diff --git a/.gitmodules b/.gitmodules
index b399bf7b94d..7d975031c54 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -67,10 +67,10 @@
 	url = https://github.com/ClickHouse-Extras/libgsasl.git
 [submodule "contrib/libcxx"]
 	path = contrib/libcxx
-	url = https://github.com/llvm-mirror/libcxx.git
+	url = https://github.com/ClickHouse-Extras/libcxx.git
 [submodule "contrib/libcxxabi"]
 	path = contrib/libcxxabi
-	url = https://github.com/llvm-mirror/libcxxabi.git
+	url = https://github.com/ClickHouse-Extras/libcxxabi.git
 [submodule "contrib/snappy"]
 	path = contrib/snappy
 	url = https://github.com/google/snappy
@@ -128,3 +128,6 @@
 [submodule "contrib/icu"]
 	path = contrib/icu
 	url = https://github.com/unicode-org/icu.git
+[submodule "contrib/libc-headers"]
+	path = contrib/libc-headers
+	url = https://github.com/ClickHouse-Extras/libc-headers.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 884667ce4fc..c6ae23c0955 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -222,7 +222,7 @@ else ()
     set(NOT_UNBUNDLED 1)
 endif ()
 
-# Using system libs can cause lot of warnings in includes.
+# Using system libs can cause lot of warnings in includes (on macro expansion).
 if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
     option (NO_WERROR "Disable -Werror compiler option" ON)
 endif ()
@@ -352,7 +352,6 @@ if (ENABLE_TESTS)
 endif ()
 
 # Need to process before "contrib" dir:
-include (libs/libcommon/cmake/find_gperftools.cmake)
 include (libs/libcommon/cmake/find_jemalloc.cmake)
 include (libs/libcommon/cmake/find_cctz.cmake)
 include (libs/libmysqlxx/cmake/find_mysqlclient.cmake)
@@ -362,18 +361,6 @@ include (libs/libmysqlxx/cmake/find_mysqlclient.cmake)
 if (USE_JEMALLOC)
     message (STATUS "Link jemalloc: ${JEMALLOC_LIBRARIES}")
     set (MALLOC_LIBRARIES ${JEMALLOC_LIBRARIES})
-elseif (USE_TCMALLOC)
-    if (DEBUG_TCMALLOC AND NOT GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
-        message (FATAL_ERROR "Requested DEBUG_TCMALLOC but debug library is not found. You should install Google Perftools. Example: sudo apt-get install libgoogle-perftools-dev")
-    endif ()
-
-    if (DEBUG_TCMALLOC AND GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
-        message (STATUS "Link libtcmalloc_minimal_debug for testing: ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG}")
-        set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG})
-    else ()
-        message (STATUS "Link libtcmalloc_minimal: ${GPERFTOOLS_TCMALLOC_MINIMAL}")
-        set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL})
-    endif ()
 elseif (SANITIZE)
     message (STATUS "Will use ${SANITIZE} sanitizer.")
 elseif (OS_LINUX)
diff --git a/cmake/Modules/FindGperftools.cmake b/cmake/Modules/FindGperftools.cmake
deleted file mode 100644
index 1cb8d42343f..00000000000
--- a/cmake/Modules/FindGperftools.cmake
+++ /dev/null
@@ -1,61 +0,0 @@
-# https://github.com/vast-io/vast/blob/master/cmake/FindGperftools.cmake
-
-# Tries to find Gperftools.
-#
-# Usage of this module as follows:
-#
-#     find_package(Gperftools)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  Gperftools_ROOT_DIR  Set this variable to the root installation of
-#                       Gperftools if the module has problems finding
-#                       the proper installation path.
-#
-# Variables defined by this module:
-#
-#  GPERFTOOLS_FOUND              System has Gperftools libs/headers
-#  GPERFTOOLS_LIBRARIES          The Gperftools libraries (tcmalloc & profiler)
-#  GPERFTOOLS_INCLUDE_DIR        The location of Gperftools headers
-
-find_library(GPERFTOOLS_TCMALLOC
-  NAMES tcmalloc
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_MINIMAL
-  NAMES tcmalloc_minimal
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG
-  NAMES tcmalloc_minimal_debug
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_PROFILER
-  NAMES profiler
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER
-  NAMES tcmalloc_and_profiler
-  HINTS ${Gperftools_ROOT_DIR}/lib)
-
-find_path(GPERFTOOLS_INCLUDE_DIR
-  NAMES gperftools/heap-profiler.h
-  HINTS ${Gperftools_ROOT_DIR}/include)
-
-set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER})
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(
-  Gperftools
-  DEFAULT_MSG
-  GPERFTOOLS_LIBRARIES
-  GPERFTOOLS_INCLUDE_DIR)
-
-mark_as_advanced(
-  Gperftools_ROOT_DIR
-  GPERFTOOLS_TCMALLOC
-  GPERFTOOLS_PROFILER
-  GPERFTOOLS_TCMALLOC_AND_PROFILER
-  GPERFTOOLS_LIBRARIES
-  GPERFTOOLS_INCLUDE_DIR)
diff --git a/cmake/arch.cmake b/cmake/arch.cmake
index 79fe92c03e5..ec644b6fe77 100644
--- a/cmake/arch.cmake
+++ b/cmake/arch.cmake
@@ -19,6 +19,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
     set (ARCH_PPC64LE 1)
     # FIXME: move this check into tools.cmake
     if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
-        message(FATAL_ERROR "Only gcc-8 is supported for powerpc architecture")
+        message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture")
     endif ()
 endif ()
diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake
index 4f2430228d4..839b49e1889 100644
--- a/cmake/find/cxx.cmake
+++ b/cmake/find/cxx.cmake
@@ -1,7 +1,5 @@
-if (COMPILER_CLANG)
-    option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++" ON)
-    option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${NOT_UNBUNDLED})
-endif()
+option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++" ${NOT_UNBUNDLED})
+option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${NOT_UNBUNDLED})
 
 if (USE_LIBCXX)
     set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake
index 8dfd26ec581..7cb67d1a990 100644
--- a/cmake/find/llvm.cmake
+++ b/cmake/find/llvm.cmake
@@ -1,7 +1,7 @@
 # Broken in macos. TODO: update clang, re-test, enable
 if (NOT APPLE)
-    option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" ${ENABLE_LIBRARIES})
-    option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library. Default: system library for quicker developer builds." 0)
+    option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile_expressions' option for query execution" ${ENABLE_LIBRARIES})
+    option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library." ${NOT_UNBUNDLED})
 endif ()
 
 if (ENABLE_EMBEDDED_COMPILER)
@@ -13,27 +13,11 @@ if (ENABLE_EMBEDDED_COMPILER)
     if (NOT USE_INTERNAL_LLVM_LIBRARY)
         set (LLVM_PATHS "/usr/local/lib/llvm")
 
-        if (LLVM_VERSION)
-            find_package(LLVM ${LLVM_VERSION} CONFIG PATHS ${LLVM_PATHS})
-        elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-            find_package(LLVM ${CMAKE_CXX_COMPILER_VERSION} CONFIG PATHS ${LLVM_PATHS})
-        else ()
-            # TODO: 9 8
-            foreach(llvm_v 7.1 7 6 5)
-                if (NOT LLVM_FOUND)
-                    find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS})
-                endif ()
-            endforeach ()
-        endif ()
-
-        if (LLVM_FOUND)
-            find_library (LLD_LIBRARY_TEST lldCore PATHS ${LLVM_LIBRARY_DIRS})
-            find_path (LLD_INCLUDE_DIR_TEST NAMES lld/Core/AbsoluteAtom.h PATHS ${LLVM_INCLUDE_DIRS})
-            if (NOT LLD_LIBRARY_TEST OR NOT LLD_INCLUDE_DIR_TEST)
-                set (LLVM_FOUND 0)
-                message(WARNING "liblld (${LLD_LIBRARY_TEST}, ${LLD_INCLUDE_DIR_TEST}) not found in ${LLVM_INCLUDE_DIRS} ${LLVM_LIBRARY_DIRS}. Disabling internal compiler.")
+        foreach(llvm_v 9 8)
+            if (NOT LLVM_FOUND)
+                find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS})
             endif ()
-        endif ()
+        endforeach ()
 
         if (LLVM_FOUND)
             # Remove dynamically-linked zlib and libedit from LLVM's dependencies:
@@ -51,30 +35,39 @@ if (ENABLE_EMBEDDED_COMPILER)
             set (LLVM_FOUND 0)
             set (USE_EMBEDDED_COMPILER 0)
         endif ()
-
-        # TODO: fix llvm 8+ and remove:
-        if (LLVM_FOUND AND LLVM_VERSION_MAJOR GREATER 7)
-            message(WARNING "LLVM 8+ not supported yet, disabling.")
-            set (USE_EMBEDDED_COMPILER 0)
-        endif ()
     else()
-        set (LLVM_FOUND 1)
-        set (USE_EMBEDDED_COMPILER 1)
-        set (LLVM_VERSION "7.0.0bundled")
-        set (LLVM_INCLUDE_DIRS
-            ${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include
-            ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include
-            ${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/include
-            ${ClickHouse_BINARY_DIR}/contrib/llvm/clang/include
-            ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/tools/clang/include
-            ${ClickHouse_SOURCE_DIR}/contrib/llvm/lld/include
-            ${ClickHouse_BINARY_DIR}/contrib/llvm/lld/include
-            ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/tools/lld/include)
-        set (LLVM_LIBRARY_DIRS ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm)
+        if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
+            message(WARNING "Option ENABLE_EMBEDDED_COMPILER is set but LLVM library cannot build if build directory is the same as source directory.")
+            set (LLVM_FOUND 0)
+            set (USE_EMBEDDED_COMPILER 0)
+        elseif (SPLIT_SHARED_LIBRARIES)
+            # llvm-tablegen cannot find shared libraries that we build. Probably can be easily fixed.
+            message(WARNING "Option ENABLE_EMBEDDED_COMPILER is not compatible with SPLIT_SHARED_LIBRARIES. Build of LLVM will be disabled.")
+            set (LLVM_FOUND 0)
+            set (USE_EMBEDDED_COMPILER 0)
+        elseif (NOT ARCH_AMD64)
+            # It's not supported yet, but you can help.
+            message(WARNING "Option ENABLE_EMBEDDED_COMPILER is only available for x86_64. Build of LLVM will be disabled.")
+            set (LLVM_FOUND 0)
+            set (USE_EMBEDDED_COMPILER 0)
+        elseif (SANITIZE STREQUAL "undefined")
+            # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
+            message(WARNING "Option ENABLE_EMBEDDED_COMPILER does not work with UBSan, because 'llvm-tblgen' tool from LLVM has undefined behaviour. Build of LLVM will be disabled.")
+            set (LLVM_FOUND 0)
+            set (USE_EMBEDDED_COMPILER 0)
+        else ()
+            set (LLVM_FOUND 1)
+            set (USE_EMBEDDED_COMPILER 1)
+            set (LLVM_VERSION "9.0.0bundled")
+            set (LLVM_INCLUDE_DIRS
+                ${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include
+                ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include
+            )
+            set (LLVM_LIBRARY_DIRS ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm)
+        endif()
     endif()
 
     if (LLVM_FOUND)
-        message(STATUS "LLVM version: ${LLVM_PACKAGE_VERSION}")
         message(STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}")
         message(STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}")
         message(STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}")
@@ -82,16 +75,53 @@ if (ENABLE_EMBEDDED_COMPILER)
 endif()
 
 
-function(llvm_libs_all REQUIRED_LLVM_LIBRARIES)
-    llvm_map_components_to_libnames (result all)
-    if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result)
-        list (REMOVE_ITEM result "LTO" "LLVM")
-    else()
-        set (result "LLVM")
-    endif ()
-    if (TERMCAP_LIBRARY)
-        list (APPEND result ${TERMCAP_LIBRARY})
-    endif ()
-    list (APPEND result ${CMAKE_DL_LIBS} ${ZLIB_LIBRARIES})
-    set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE)
-endfunction()
+# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
+set (REQUIRED_LLVM_LIBRARIES
+LLVMOrcJIT
+LLVMExecutionEngine
+LLVMRuntimeDyld
+LLVMX86CodeGen
+LLVMX86Desc
+LLVMX86Info
+LLVMX86Utils
+LLVMAsmPrinter
+LLVMDebugInfoDWARF
+LLVMGlobalISel
+LLVMSelectionDAG
+LLVMMCDisassembler
+LLVMPasses
+LLVMCodeGen
+LLVMipo
+LLVMBitWriter
+LLVMInstrumentation
+LLVMScalarOpts
+LLVMAggressiveInstCombine
+LLVMInstCombine
+LLVMVectorize
+LLVMTransformUtils
+LLVMTarget
+LLVMAnalysis
+LLVMProfileData
+LLVMObject
+LLVMBitReader
+LLVMCore
+LLVMRemarks
+LLVMBitstreamReader
+LLVMMCParser
+LLVMMC
+LLVMBinaryFormat
+LLVMDebugInfoCodeView
+LLVMSupport
+LLVMDemangle
+)
+
+#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES)
+#    llvm_map_components_to_libnames (result all)
+#    if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result)
+#        list (REMOVE_ITEM result "LTO" "LLVM")
+#    else()
+#        set (result "LLVM")
+#    endif ()
+#    list (APPEND result ${CMAKE_DL_LIBS} ${ZLIB_LIBRARIES})
+#    set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE)
+#endfunction()
diff --git a/cmake/find/s3.cmake b/cmake/find/s3.cmake
index ee565ceabe8..af53dc80feb 100644
--- a/cmake/find/s3.cmake
+++ b/cmake/find/s3.cmake
@@ -1,4 +1,4 @@
-if(NOT OS_FREEBSD AND NOT APPLE)
+if(NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_ARM)
     option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES})
 endif()
 
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index ef1354628fe..6ecc3e96593 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -18,6 +18,14 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
 set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
 set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
 
+# glibc-compatibility library relies to fixed version of libc headers
+# (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
+# This is for x86_64. For other architectures we have separate toolchains.
+if (ARCH_AMD64)
+    set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
+    set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
+endif ()
+
 # Global libraries
 
 add_library(global-libs INTERFACE)
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 8323961202e..cb099ade7f5 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -76,6 +76,9 @@ if (SANITIZE)
             set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
         endif ()
 
+        # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
+        set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
+
     elseif (SANITIZE STREQUAL "libfuzzer")
         # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
         # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them (tests) have entry point for fuzzer and it's not checked.
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 08bbe299549..415d3a88703 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -66,34 +66,19 @@ if (USE_INTERNAL_ZLIB_LIBRARY)
     endif ()
 
     add_subdirectory (${INTERNAL_ZLIB_NAME})
-    # TODO: make pull to Dead2/zlib-ng and remove:
     # We should use same defines when including zlib.h as used when zlib compiled
     target_compile_definitions (zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
     target_compile_definitions (zlibstatic PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64")
+    if (ARCH_AMD64 OR ARCH_AARCH64)
        target_compile_definitions (zlib PUBLIC X86_64 UNALIGNED_OK)
        target_compile_definitions (zlibstatic PUBLIC X86_64 UNALIGNED_OK)
     endif ()
-
-    #set_target_properties(example PROPERTIES EXCLUDE_FROM_ALL 1)
-    #if (TARGET example64)
-    #    set_target_properties(example64 PROPERTIES EXCLUDE_FROM_ALL 1)
-    #endif ()
-
-    #set_target_properties(minigzip PROPERTIES EXCLUDE_FROM_ALL 1)
-    #if (TARGET minigzip64)
-    #    set_target_properties(minigzip64 PROPERTIES EXCLUDE_FROM_ALL 1)
-    #endif ()
 endif ()
 
 if (USE_INTERNAL_CCTZ_LIBRARY)
     add_subdirectory (cctz-cmake)
 endif ()
 
-if (ENABLE_TCMALLOC AND USE_INTERNAL_GPERFTOOLS_LIBRARY)
-    add_subdirectory (libtcmalloc)
-endif ()
-
 if (ENABLE_JEMALLOC AND USE_INTERNAL_JEMALLOC_LIBRARY)
     add_subdirectory (jemalloc-cmake)
 endif ()
@@ -175,10 +160,7 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
     set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
     set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
     set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
-    #set (BOOST_INCLUDEDIR Boost_INCLUDE_DIRS)
     set (Boost_FOUND 1 CACHE INTERNAL "")
-    #set (ZLIB_HOME ${ZLIB_INCLUDE_DIR})
-    #set (ZLIB_FOUND 1)
     if (MAKE_STATIC_LIBRARIES)
         set (PARQUET_ARROW_LINKAGE "static" CACHE INTERNAL "")
         set (ARROW_TEST_LINKAGE "static" CACHE INTERNAL "")
@@ -218,6 +200,11 @@ else()
     endif()
 
     add_subdirectory(arrow-cmake)
+
+    # The library is large - avoid bloat.
+    target_compile_options (${ARROW_LIBRARY} PRIVATE -g0)
+    target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
+    target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
 endif()
 endif()
 
@@ -254,28 +241,14 @@ elseif(GTEST_SRC_DIR)
     target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0)
 endif()
 
-if (USE_INTERNAL_LLVM_LIBRARY)
-    file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/empty.cpp CONTENT " ")
-    add_library(LLVM0 ${CMAKE_CURRENT_BINARY_DIR}/empty.cpp) # silly cmake bug fix
-    add_library(LLVMOFF ${CMAKE_CURRENT_BINARY_DIR}/empty.cpp)
+if (USE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY)
     # ld: unknown option: --color-diagnostics
     if (APPLE)
         set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
     endif ()
     set (LLVM_ENABLE_EH 1 CACHE INTERNAL "")
     set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
-    set (LLVM_INCLUDE_TESTS 0 CACHE INTERNAL "")
-    set (LLVM_INCLUDE_EXAMPLES 0 CACHE INTERNAL "")
-    set (LLVM_INCLUDE_TOOLS 0 CACHE INTERNAL "")
-    set (LLVM_INSTALL_TOOLCHAIN_ONLY 0 CACHE INTERNAL "")
-    set (CLANG_BUILT_STANDALONE 0 CACHE INTERNAL "")
-    set (LLDB_BUILT_STANDALONE 0 CACHE INTERNAL "")
-    set (CLANG_ENABLE_STATIC_ANALYZER 0 CACHE INTERNAL "")
-    set (CLANG_ENABLE_ARCMT 0 CACHE INTERNAL "")
-    set (CLANG_BUILD_TOOLS 0 CACHE INTERNAL "")
-    set (BENCHMARK_ENABLE_GTEST_TESTS 0 CACHE INTERNAL "")
-    set (BENCHMARK_ENABLE_ASSEMBLY_TESTS 0 CACHE INTERNAL "")
-    set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE INTERNAL "")
+    set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "")
     add_subdirectory (llvm/llvm)
 endif ()
 
@@ -320,6 +293,11 @@ if (USE_INTERNAL_AWS_S3_LIBRARY)
     set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS})
     set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH})
     add_subdirectory(aws-s3-cmake)
+
+    # The library is large - avoid bloat.
+    target_compile_options (aws_s3 PRIVATE -g0)
+    target_compile_options (aws_s3_checksums PRIVATE -g0)
+    target_compile_options (libcurl PRIVATE -g0)
 endif ()
 
 if (USE_BASE64)
@@ -328,6 +306,13 @@ endif()
 
 if (USE_INTERNAL_HYPERSCAN_LIBRARY)
     add_subdirectory (hyperscan)
+
+    # The library is large - avoid bloat.
+    if (USE_STATIC_LIBRARIES)
+        target_compile_options (hs PRIVATE -g0)
+    else ()
+        target_compile_options (hs_shared PRIVATE -g0)
+    endif ()
 endif()
 
 if (USE_SIMDJSON)
@@ -341,7 +326,3 @@ endif()
 if (USE_FASTOPS)
     add_subdirectory (fastops-cmake)
 endif()
-
-#if (USE_INTERNAL_ORC_LIBRARY)
-#    add_subdirectory(orc-cmake)
-#endif ()
diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt
index b1387278c71..c54b4e8eae5 100644
--- a/contrib/capnproto-cmake/CMakeLists.txt
+++ b/contrib/capnproto-cmake/CMakeLists.txt
@@ -54,17 +54,6 @@ set_target_properties(capnp
                       )
 target_link_libraries(capnp PUBLIC kj)
 
-# The library has substandard code
-if (COMPILER_GCC)
-    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-maybe-uninitialized
-        -Wno-deprecated-declarations -Wno-class-memaccess)
-elseif (COMPILER_CLANG)
-    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-deprecated-declarations)
-endif ()
-
-target_compile_options(kj PRIVATE ${SUPPRESS_WARNINGS})
-target_compile_options(capnp PRIVATE ${SUPPRESS_WARNINGS})
-
 set (CAPNPC_SRCS
     ${CAPNPROTO_SOURCE_DIR}/capnp/compiler/type-id.c++
     ${CAPNPROTO_SOURCE_DIR}/capnp/compiler/error-reporter.c++
@@ -80,3 +69,15 @@ set (CAPNPC_SRCS
 
 add_library(capnpc ${CAPNPC_SRCS})
 target_link_libraries(capnpc PUBLIC capnp)
+
+# The library has substandard code
+if (COMPILER_GCC)
+    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-maybe-uninitialized
+        -Wno-deprecated-declarations -Wno-class-memaccess)
+elseif (COMPILER_CLANG)
+    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-deprecated-declarations)
+endif ()
+
+target_compile_options(kj PRIVATE ${SUPPRESS_WARNINGS})
+target_compile_options(capnp PRIVATE ${SUPPRESS_WARNINGS})
+target_compile_options(capnpc PRIVATE ${SUPPRESS_WARNINGS})
diff --git a/contrib/croaring/CMakeLists.txt b/contrib/croaring/CMakeLists.txt
index eeffb1e0a34..da19911487f 100644
--- a/contrib/croaring/CMakeLists.txt
+++ b/contrib/croaring/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_library(roaring
-	roaring.c
-	roaring/roaring.h
-	roaring/roaring.hh)
+    roaring.c
+    roaring/roaring.h
+    roaring/roaring.hh)
 
-target_include_directories (roaring PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories (roaring SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/contrib/libbtrie/CMakeLists.txt b/contrib/libbtrie/CMakeLists.txt
index f590520c416..2b0c8e3fd75 100644
--- a/contrib/libbtrie/CMakeLists.txt
+++ b/contrib/libbtrie/CMakeLists.txt
@@ -3,9 +3,4 @@ add_library(btrie
     include/btrie.h
 )
 
-target_include_directories (btrie PUBLIC include)
-
-if (ENABLE_TESTS)
-    add_executable(test_btrie test/test_btrie.c)
-    target_link_libraries(test_btrie btrie)
-endif ()
+target_include_directories (btrie SYSTEM PUBLIC include)
diff --git a/contrib/libc-headers b/contrib/libc-headers
new file mode 160000
index 00000000000..cd82fd9d8ee
--- /dev/null
+++ b/contrib/libc-headers
@@ -0,0 +1 @@
+Subproject commit cd82fd9d8eefe50a47a0adf7c617c3ea7d558d11
diff --git a/contrib/libcxx b/contrib/libcxx
index 9807685d51d..f7c63235238 160000
--- a/contrib/libcxx
+++ b/contrib/libcxx
@@ -1 +1 @@
-Subproject commit 9807685d51db467e097ad5eb8d5c2c16922794b2
+Subproject commit f7c63235238a71b7e0563fab8c7c5ec1b54831f6
diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt
index a654ce59d6a..ee5fe625079 100644
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@@ -1,41 +1,45 @@
+include(CheckCXXCompilerFlag)
+
 set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
 
 set(SRCS
-${LIBCXX_SOURCE_DIR}/src/optional.cpp
-${LIBCXX_SOURCE_DIR}/src/variant.cpp
-${LIBCXX_SOURCE_DIR}/src/chrono.cpp
-${LIBCXX_SOURCE_DIR}/src/thread.cpp
-${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp
-${LIBCXX_SOURCE_DIR}/src/iostream.cpp
-${LIBCXX_SOURCE_DIR}/src/strstream.cpp
-${LIBCXX_SOURCE_DIR}/src/ios.cpp
-${LIBCXX_SOURCE_DIR}/src/future.cpp
-${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
-${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp
-${LIBCXX_SOURCE_DIR}/src/hash.cpp
-${LIBCXX_SOURCE_DIR}/src/string.cpp
-${LIBCXX_SOURCE_DIR}/src/debug.cpp
-${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
-${LIBCXX_SOURCE_DIR}/src/utility.cpp
-${LIBCXX_SOURCE_DIR}/src/any.cpp
-${LIBCXX_SOURCE_DIR}/src/exception.cpp
-${LIBCXX_SOURCE_DIR}/src/memory.cpp
-${LIBCXX_SOURCE_DIR}/src/new.cpp
-${LIBCXX_SOURCE_DIR}/src/valarray.cpp
-${LIBCXX_SOURCE_DIR}/src/vector.cpp
 ${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
-${LIBCXX_SOURCE_DIR}/src/functional.cpp
-${LIBCXX_SOURCE_DIR}/src/regex.cpp
+${LIBCXX_SOURCE_DIR}/src/any.cpp
 ${LIBCXX_SOURCE_DIR}/src/bind.cpp
-${LIBCXX_SOURCE_DIR}/src/mutex.cpp
 ${LIBCXX_SOURCE_DIR}/src/charconv.cpp
-${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp
-${LIBCXX_SOURCE_DIR}/src/locale.cpp
-${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp
-${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp
+${LIBCXX_SOURCE_DIR}/src/chrono.cpp
+${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp
+${LIBCXX_SOURCE_DIR}/src/condition_variable_destructor.cpp
+${LIBCXX_SOURCE_DIR}/src/debug.cpp
+${LIBCXX_SOURCE_DIR}/src/exception.cpp
+${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp
 ${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp
-${LIBCXX_SOURCE_DIR}/src/system_error.cpp
+${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp
+${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp
+${LIBCXX_SOURCE_DIR}/src/functional.cpp
+${LIBCXX_SOURCE_DIR}/src/future.cpp
+${LIBCXX_SOURCE_DIR}/src/hash.cpp
+${LIBCXX_SOURCE_DIR}/src/ios.cpp
+${LIBCXX_SOURCE_DIR}/src/iostream.cpp
+${LIBCXX_SOURCE_DIR}/src/locale.cpp
+${LIBCXX_SOURCE_DIR}/src/memory.cpp
+${LIBCXX_SOURCE_DIR}/src/mutex.cpp
+${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
+${LIBCXX_SOURCE_DIR}/src/new.cpp
+${LIBCXX_SOURCE_DIR}/src/optional.cpp
 ${LIBCXX_SOURCE_DIR}/src/random.cpp
+${LIBCXX_SOURCE_DIR}/src/regex.cpp
+${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
+${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
+${LIBCXX_SOURCE_DIR}/src/string.cpp
+${LIBCXX_SOURCE_DIR}/src/strstream.cpp
+${LIBCXX_SOURCE_DIR}/src/system_error.cpp
+${LIBCXX_SOURCE_DIR}/src/thread.cpp
+${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp
+${LIBCXX_SOURCE_DIR}/src/utility.cpp
+${LIBCXX_SOURCE_DIR}/src/valarray.cpp
+${LIBCXX_SOURCE_DIR}/src/variant.cpp
+${LIBCXX_SOURCE_DIR}/src/vector.cpp
 )
 
 add_library(cxx ${SRCS})
@@ -43,8 +47,15 @@ add_library(cxx ${SRCS})
 target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
 
-target_compile_options(cxx PUBLIC -nostdinc++ -Wno-reserved-id-macro)
-if (OS_DARWIN AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11))
+target_compile_options(cxx PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)
+
+check_cxx_compiler_flag(-Wreserved-id-macro HAVE_WARNING_RESERVED_ID_MACRO)
+if (HAVE_WARNING_RESERVED_ID_MACRO)
+    target_compile_options(cxx PUBLIC -Wno-reserved-id-macro)
+endif ()
+
+check_cxx_compiler_flag(-Wctad-maybe-unsupported HAVE_WARNING_CTAD_MAYBE_UNSUPPORTED)
+if (HAVE_WARNING_CTAD_MAYBE_UNSUPPORTED)
     target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
 endif ()
 
diff --git a/contrib/libcxxabi b/contrib/libcxxabi
index d56efcc7a52..c26cf36f838 160000
--- a/contrib/libcxxabi
+++ b/contrib/libcxxabi
@@ -1 +1 @@
-Subproject commit d56efcc7a52739518dbe7df9e743073e00951fa1
+Subproject commit c26cf36f8387c5edf2cabb4a630f0975c35aa9fb
diff --git a/contrib/libtcmalloc/AUTHORS b/contrib/libtcmalloc/AUTHORS
deleted file mode 100644
index 3995ed4cf57..00000000000
--- a/contrib/libtcmalloc/AUTHORS
+++ /dev/null
@@ -1,2 +0,0 @@
-google-perftools@googlegroups.com
-
diff --git a/contrib/libtcmalloc/CMakeLists.txt b/contrib/libtcmalloc/CMakeLists.txt
deleted file mode 100644
index d7f52e1d384..00000000000
--- a/contrib/libtcmalloc/CMakeLists.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-message (STATUS "Building: tcmalloc_minimal_internal")
-
-add_library (tcmalloc_minimal_internal
-./src/malloc_hook.cc
-./src/base/spinlock_internal.cc
-./src/base/spinlock.cc
-./src/base/dynamic_annotations.c
-./src/base/linuxthreads.cc
-./src/base/elf_mem_image.cc
-./src/base/vdso_support.cc
-./src/base/sysinfo.cc
-./src/base/low_level_alloc.cc
-./src/base/thread_lister.c
-./src/base/logging.cc
-./src/base/atomicops-internals-x86.cc
-./src/memfs_malloc.cc
-./src/tcmalloc.cc
-./src/malloc_extension.cc
-./src/thread_cache.cc
-./src/symbolize.cc
-./src/page_heap.cc
-./src/maybe_threads.cc
-./src/central_freelist.cc
-./src/static_vars.cc
-./src/sampler.cc
-./src/internal_logging.cc
-./src/system-alloc.cc
-./src/span.cc
-./src/common.cc
-./src/stacktrace.cc
-./src/stack_trace_table.cc
-./src/heap-checker.cc
-./src/heap-checker-bcad.cc
-./src/heap-profile-table.cc
-./src/raw_printer.cc
-./src/memory_region_map.cc
-)
-
-
-target_compile_options (tcmalloc_minimal_internal
-    PRIVATE
-    -DNO_TCMALLOC_SAMPLES
-    -DNDEBUG
-    -DNO_FRAME_POINTER
-    -Wwrite-strings
-    -Wno-sign-compare
-    -Wno-unused-result
-    -Wno-deprecated-declarations
-    -Wno-unused-function
-    -Wno-unused-private-field
-
-    PUBLIC
-    -fno-builtin-malloc
-    -fno-builtin-free
-    -fno-builtin-realloc
-    -fno-builtin-calloc
-    -fno-builtin-cfree
-    -fno-builtin-memalign
-    -fno-builtin-posix_memalign
-    -fno-builtin-valloc
-    -fno-builtin-pvalloc
-)
-
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.1)
-    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-dynamic-exception-spec )
-endif ()
-
-if (CMAKE_SYSTEM MATCHES "FreeBSD" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    target_compile_options(tcmalloc_minimal_internal PUBLIC -Wno-unused-but-set-variable)
-endif ()
-
-if (CMAKE_SYSTEM MATCHES "FreeBSD")
-    target_compile_definitions(tcmalloc_minimal_internal PUBLIC _GNU_SOURCE)
-endif ()
-
-target_include_directories (tcmalloc_minimal_internal PUBLIC include)
-target_include_directories (tcmalloc_minimal_internal PRIVATE src)
-
-find_package (Threads)
-target_link_libraries (tcmalloc_minimal_internal ${CMAKE_THREAD_LIBS_INIT})
diff --git a/contrib/libtcmalloc/COPYING b/contrib/libtcmalloc/COPYING
deleted file mode 100644
index e4956cfd9fd..00000000000
--- a/contrib/libtcmalloc/COPYING
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2005, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/libtcmalloc/README b/contrib/libtcmalloc/README
deleted file mode 100644
index 8f7377c0b3b..00000000000
--- a/contrib/libtcmalloc/README
+++ /dev/null
@@ -1,8 +0,0 @@
-https://github.com/gperftools/gperftools/commit/dde32f8bbc95312379f9f5a651799815bb6327c5
-
-Several modifications:
-1. Disabled TCMALLOC_AGGRESSIVE_DECOMMIT by default. It is important.
-2. Using only files for tcmalloc_minimal build (./configure --enable-minimal).
-3. Using some compiler flags from project.
-4. Removed warning about unused variable when build with NDEBUG (by default).
-5. Including config.h with relative path.
diff --git a/contrib/libtcmalloc/include/gperftools/heap-checker.h b/contrib/libtcmalloc/include/gperftools/heap-checker.h
deleted file mode 100644
index 5a87d8da7f7..00000000000
--- a/contrib/libtcmalloc/include/gperftools/heap-checker.h
+++ /dev/null
@@ -1,422 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat)
-//
-//
-// Module for detecing heap (memory) leaks.
-//
-// For full(er) information, see doc/heap_checker.html
-//
-// This module can be linked into programs with
-// no slowdown caused by this unless you activate the leak-checker:
-//
-//    1. Set the environment variable HEAPCHEK to _type_ before
-//       running the program.
-//
-// _type_ is usually "normal" but can also be "minimal", "strict", or
-// "draconian".  (See the html file for other options, like 'local'.)
-//
-// After that, just run your binary.  If the heap-checker detects
-// a memory leak at program-exit, it will print instructions on how
-// to track down the leak.
-
-#ifndef BASE_HEAP_CHECKER_H_
-#define BASE_HEAP_CHECKER_H_
-
-#include <sys/types.h>  // for size_t
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>     // for uintptr_t
-#endif
-#include <stdarg.h>     // for va_list
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// The class is thread-safe with respect to all the provided static methods,
-// as well as HeapLeakChecker objects: they can be accessed by multiple threads.
-class PERFTOOLS_DLL_DECL HeapLeakChecker {
- public:
-
-  // ----------------------------------------------------------------------- //
-  // Static functions for working with (whole-program) leak checking.
-
-  // If heap leak checking is currently active in some mode
-  // e.g. if leak checking was started (and is still active now)
-  // due to HEAPCHECK=... defined in the environment.
-  // The return value reflects iff HeapLeakChecker objects manually
-  // constructed right now will be doing leak checking or nothing.
-  // Note that we can go from active to inactive state during InitGoogle()
-  // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
-  static bool IsActive();
-
-  // Return pointer to the whole-program checker if it has been created
-  // and NULL otherwise.
-  // Once GlobalChecker() returns non-NULL that object will not disappear and
-  // will be returned by all later GlobalChecker calls.
-  // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below)
-  // for the whole-program checker after one calls NoGlobalLeaks()
-  // or similar and gets false.
-  static HeapLeakChecker* GlobalChecker();
-
-  // Do whole-program leak check now (if it was activated for this binary);
-  // return false only if it was activated and has failed.
-  // The mode of the check is controlled by the command-line flags.
-  // This method can be called repeatedly.
-  // Things like GlobalChecker()->SameHeap() can also be called explicitly
-  // to do the desired flavor of the check.
-  static bool NoGlobalLeaks();
-
-  // If whole-program checker if active,
-  // cancel its automatic execution after main() exits.
-  // This requires that some leak check (e.g. NoGlobalLeaks())
-  // has been called at least once on the whole-program checker.
-  static void CancelGlobalCheck();
-
-  // ----------------------------------------------------------------------- //
-  // Non-static functions for starting and doing leak checking.
-
-  // Start checking and name the leak check performed.
-  // The name is used in naming dumped profiles
-  // and needs to be unique only within your binary.
-  // It must also be a string that can be a part of a file name,
-  // in particular not contain path expressions.
-  explicit HeapLeakChecker(const char *name);
-
-  // Destructor (verifies that some *NoLeaks or *SameHeap method
-  // has been called at least once).
-  ~HeapLeakChecker();
-
-  // These used to be different but are all the same now: they return
-  // true iff all memory allocated since this HeapLeakChecker object
-  // was constructor is still reachable from global state.
-  //
-  // Because we fork to convert addresses to symbol-names, and forking
-  // is not thread-safe, and we may be called in a threaded context,
-  // we do not try to symbolize addresses when called manually.
-  bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
-
-  // These forms are obsolete; use NoLeaks() instead.
-  // TODO(csilvers): mark as DEPRECATED.
-  bool QuickNoLeaks()  { return NoLeaks(); }
-  bool BriefNoLeaks()  { return NoLeaks(); }
-  bool SameHeap()      { return NoLeaks(); }
-  bool QuickSameHeap() { return NoLeaks(); }
-  bool BriefSameHeap() { return NoLeaks(); }
-
-  // Detailed information about the number of leaked bytes and objects
-  // (both of these can be negative as well).
-  // These are available only after a *SameHeap or *NoLeaks
-  // method has been called.
-  // Note that it's possible for both of these to be zero
-  // while SameHeap() or NoLeaks() returned false in case
-  // of a heap state change that is significant
-  // but preserves the byte and object counts.
-  ssize_t BytesLeaked() const;
-  ssize_t ObjectsLeaked() const;
-
-  // ----------------------------------------------------------------------- //
-  // Static helpers to make us ignore certain leaks.
-
-  // Scoped helper class.  Should be allocated on the stack inside a
-  // block of code.  Any heap allocations done in the code block
-  // covered by the scoped object (including in nested function calls
-  // done by the code block) will not be reported as leaks.  This is
-  // the recommended replacement for the GetDisableChecksStart() and
-  // DisableChecksToHereFrom() routines below.
-  //
-  // Example:
-  //   void Foo() {
-  //     HeapLeakChecker::Disabler disabler;
-  //     ... code that allocates objects whose leaks should be ignored ...
-  //   }
-  //
-  // REQUIRES: Destructor runs in same thread as constructor
-  class Disabler {
-   public:
-    Disabler();
-    ~Disabler();
-   private:
-    Disabler(const Disabler&);        // disallow copy
-    void operator=(const Disabler&);  // and assign
-  };
-
-  // Ignore an object located at 'ptr' (can go at the start or into the object)
-  // as well as all heap objects (transitively) referenced from it for the
-  // purposes of heap leak checking. Returns 'ptr' so that one can write
-  //   static T* obj = IgnoreObject(new T(...));
-  //
-  // If 'ptr' does not point to an active allocated object at the time of this
-  // call, it is ignored; but if it does, the object must not get deleted from
-  // the heap later on.
-  //
-  // See also HiddenPointer, below, if you need to prevent a pointer from
-  // being traversed by the heap checker but do not wish to transitively
-  // whitelist objects referenced through it.
-  template <typename T>
-  static T* IgnoreObject(T* ptr) {
-    DoIgnoreObject(static_cast<const void*>(const_cast<const T*>(ptr)));
-    return ptr;
-  }
-
-  // Undo what an earlier IgnoreObject() call promised and asked to do.
-  // At the time of this call 'ptr' must point at or inside of an active
-  // allocated object which was previously registered with IgnoreObject().
-  static void UnIgnoreObject(const void* ptr);
-
-  // ----------------------------------------------------------------------- //
-  // Internal types defined in .cc
-
-  class Allocator;
-  struct RangeValue;
-
- private:
-
-  // ----------------------------------------------------------------------- //
-  // Various helpers
-
-  // Create the name of the heap profile file.
-  // Should be deleted via Allocator::Free().
-  char* MakeProfileNameLocked();
-
-  // Helper for constructors
-  void Create(const char *name, bool make_start_snapshot);
-
-  enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
-
-  // Helper for *NoLeaks and *SameHeap
-  bool DoNoLeaks(ShouldSymbolize should_symbolize);
-
-  // Helper for NoGlobalLeaks, also called by the global destructor.
-  static bool NoGlobalLeaksMaybeSymbolize(ShouldSymbolize should_symbolize);
-
-  // These used to be public, but they are now deprecated.
-  // Will remove entirely when all internal uses are fixed.
-  // In the meantime, use friendship so the unittest can still test them.
-  static void* GetDisableChecksStart();
-  static void DisableChecksToHereFrom(const void* start_address);
-  static void DisableChecksIn(const char* pattern);
-  friend void RangeDisabledLeaks();
-  friend void NamedTwoDisabledLeaks();
-  friend void* RunNamedDisabledLeaks(void*);
-  friend void TestHeapLeakCheckerNamedDisabling();
-
-  // Actually implements IgnoreObject().
-  static void DoIgnoreObject(const void* ptr);
-
-  // Disable checks based on stack trace entry at a depth <=
-  // max_depth.  Used to hide allocations done inside some special
-  // libraries.
-  static void DisableChecksFromToLocked(const void* start_address,
-                                        const void* end_address,
-                                        int max_depth);
-
-  // Helper for DoNoLeaks to ignore all objects reachable from all live data
-  static void IgnoreAllLiveObjectsLocked(const void* self_stack_top);
-
-  // Callback we pass to TCMalloc_ListAllProcessThreads (see thread_lister.h)
-  // that is invoked when all threads of our process are found and stopped.
-  // The call back does the things needed to ignore live data reachable from
-  // thread stacks and registers for all our threads
-  // as well as do other global-live-data ignoring
-  // (via IgnoreNonThreadLiveObjectsLocked)
-  // during the quiet state of all threads being stopped.
-  // For the argument meaning see the comment by TCMalloc_ListAllProcessThreads.
-  // Here we only use num_threads and thread_pids, that TCMalloc_ListAllProcessThreads
-  // fills for us with the number and pids of all the threads of our process
-  // it found and attached to.
-  static int IgnoreLiveThreadsLocked(void* parameter,
-                                     int num_threads,
-                                     pid_t* thread_pids,
-                                     va_list ap);
-
-  // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // that we prefer to execute from IgnoreLiveThreadsLocked
-  // while all threads are stopped.
-  // This helper does live object discovery and ignoring
-  // for all objects that are reachable from everything
-  // not related to thread stacks and registers.
-  static void IgnoreNonThreadLiveObjectsLocked();
-
-  // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // to discover and ignore all heap objects
-  // reachable from currently considered live objects
-  // (live_objects static global variable in out .cc file).
-  // "name", "name2" are two strings that we print one after another
-  // in a debug message to describe what kind of live object sources
-  // are being used.
-  static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
-
-  // Do the overall whole-program heap leak check if needed;
-  // returns true when did the leak check.
-  static bool DoMainHeapCheck();
-
-  // Type of task for UseProcMapsLocked
-  enum ProcMapsTask {
-    RECORD_GLOBAL_DATA,
-    DISABLE_LIBRARY_ALLOCS
-  };
-
-  // Success/Error Return codes for UseProcMapsLocked.
-  enum ProcMapsResult {
-    PROC_MAPS_USED,
-    CANT_OPEN_PROC_MAPS,
-    NO_SHARED_LIBS_IN_PROC_MAPS
-  };
-
-  // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
-  static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task);
-
-  // A ProcMapsTask to disable allocations from 'library'
-  // that is mapped to [start_address..end_address)
-  // (only if library is a certain system library).
-  static void DisableLibraryAllocsLocked(const char* library,
-                                         uintptr_t start_address,
-                                         uintptr_t end_address);
-
-  // Return true iff "*ptr" points to a heap object
-  // ("*ptr" can point at the start or inside of a heap object
-  //  so that this works e.g. for pointers to C++ arrays, C++ strings,
-  //  multiple-inherited objects, or pointers to members).
-  // We also fill *object_size for this object then
-  // and we move "*ptr" to point to the very start of the heap object.
-  static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
-
-  // Helper to shutdown heap leak checker when it's not needed
-  // or can't function properly.
-  static void TurnItselfOffLocked();
-
-  // Internally-used c-tor to start whole-executable checking.
-  HeapLeakChecker();
-
-  // ----------------------------------------------------------------------- //
-  // Friends and externally accessed helpers.
-
-  // Helper for VerifyHeapProfileTableStackGet in the unittest
-  // to get the recorded allocation caller for ptr,
-  // which must be a heap object.
-  static const void* GetAllocCaller(void* ptr);
-  friend void VerifyHeapProfileTableStackGet();
-
-  // This gets to execute before constructors for all global objects
-  static void BeforeConstructorsLocked();
-  friend void HeapLeakChecker_BeforeConstructors();
-
-  // This gets to execute after destructors for all global objects
-  friend void HeapLeakChecker_AfterDestructors();
-
-  // Full starting of recommended whole-program checking.
-  friend void HeapLeakChecker_InternalInitStart();
-
-  // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
-  // calls DoMainHeapCheck
-  friend void HeapLeakChecker_RunHeapCleanups();
-
-  // ----------------------------------------------------------------------- //
-  // Member data.
-
-  class SpinLock* lock_;  // to make HeapLeakChecker objects thread-safe
-  const char* name_;  // our remembered name (we own it)
-                      // NULL means this leak checker is a noop
-
-  // Snapshot taken when the checker was created.  May be NULL
-  // for the global heap checker object.  We use void* instead of
-  // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h.
-  void* start_snapshot_;
-
-  bool has_checked_;  // if we have done the leak check, so these are ready:
-  ssize_t inuse_bytes_increase_;  // bytes-in-use increase for this checker
-  ssize_t inuse_allocs_increase_;  // allocations-in-use increase
-                                   // for this checker
-  bool keep_profiles_;  // iff we should keep the heap profiles we've made
-
-  // ----------------------------------------------------------------------- //
-
-  // Disallow "evil" constructors.
-  HeapLeakChecker(const HeapLeakChecker&);
-  void operator=(const HeapLeakChecker&);
-};
-
-
-// Holds a pointer that will not be traversed by the heap checker.
-// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and
-// all objects reachable from o are ignored by the heap checker.
-template <class T>
-class HiddenPointer {
- public:
-  explicit HiddenPointer(T* t)
-      : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) {
-  }
-  // Returns unhidden pointer.  Be careful where you save the result.
-  T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); }
-
- private:
-  // Arbitrary value, but not such that xor'ing with it is likely
-  // to map one valid pointer to another valid pointer:
-  static const uintptr_t kHideMask =
-      static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll);
-  uintptr_t masked_t_;
-};
-
-// A class that exists solely to run its destructor.  This class should not be
-// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below.
-class PERFTOOLS_DLL_DECL HeapCleaner {
- public:
-  typedef void (*void_function)(void);
-  HeapCleaner(void_function f);
-  static void RunHeapCleanups();
- private:
-  static std::vector<void_function>* heap_cleanups_;
-};
-
-// A macro to declare module heap check cleanup tasks
-// (they run only if we are doing heap leak checking.)
-// 'body' should be the cleanup code to run.  'name' doesn't matter,
-// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls.
-#define REGISTER_HEAPCHECK_CLEANUP(name, body)  \
-  namespace { \
-  void heapcheck_cleanup_##name() { body; } \
-  static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \
-  }
-
-#endif  // BASE_HEAP_CHECKER_H_
diff --git a/contrib/libtcmalloc/include/gperftools/heap-profiler.h b/contrib/libtcmalloc/include/gperftools/heap-profiler.h
deleted file mode 100644
index 9b673645747..00000000000
--- a/contrib/libtcmalloc/include/gperftools/heap-profiler.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for heap-profiling.
- *
- * For full(er) information, see doc/heapprofile.html
- *
- * This module can be linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "HEAPPROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "HeapProfilerStart(filename)" and "HeapProfilerStop()".
- *
- */
-
-#ifndef BASE_HEAP_PROFILER_H_
-#define BASE_HEAP_PROFILER_H_
-
-#include <stddef.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Start profiling and arrange to write profile data to file names
- * of the form: "prefix.0000", "prefix.0001", ...
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
-
-/* Returns non-zero if we are currently profiling the heap.  (Returns
- * an int rather than a bool so it's usable from C.)  This is true
- * between calls to HeapProfilerStart() and HeapProfilerStop(), and
- * also if the program has been run with HEAPPROFILER, or some other
- * way to turn on whole-program profiling.
- */
-int IsHeapProfilerRunning();
-
-/* Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
- * but the currently accumulated profiling information will be cleared.
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStop();
-
-/* Dump a profile now - can be used for dumping at a hopefully
- * quiescent state in your program, in order to more easily track down
- * memory leaks. Will include the reason in the logged message
- */
-PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason);
-
-/* Generate current heap profiling information.
- * Returns an empty string when heap profiling is not active.
- * The returned pointer is a '\0'-terminated string allocated using malloc()
- * and should be free()-ed as soon as the caller does not need it anymore.
- */
-PERFTOOLS_DLL_DECL char* GetHeapProfile();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_HEAP_PROFILER_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_extension.h b/contrib/libtcmalloc/include/gperftools/malloc_extension.h
deleted file mode 100644
index 689b5f17cef..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_extension.h
+++ /dev/null
@@ -1,434 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Extra extensions exported by some malloc implementations.  These
-// extensions are accessed through a virtual base class so an
-// application can link against a malloc that does not implement these
-// extensions, and it will get default versions that do nothing.
-//
-// NOTE FOR C USERS: If you wish to use this functionality from within
-// a C program, see malloc_extension_c.h.
-
-#ifndef BASE_MALLOC_EXTENSION_H_
-#define BASE_MALLOC_EXTENSION_H_
-
-#include <stddef.h>
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>
-#endif
-#include <string>
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-static const int kMallocHistogramSize = 64;
-
-// One day, we could support other types of writers (perhaps for C?)
-typedef std::string MallocExtensionWriter;
-
-namespace base {
-struct MallocRange;
-}
-
-// Interface to a pluggable system allocator.
-class PERFTOOLS_DLL_DECL SysAllocator {
- public:
-  SysAllocator() {
-  }
-  virtual ~SysAllocator();
-
-  // Allocates "size"-byte of memory from system aligned with "alignment".
-  // Returns NULL if failed. Otherwise, the returned pointer p up to and
-  // including (p + actual_size -1) have been allocated.
-  virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0;
-};
-
-// The default implementations of the following routines do nothing.
-// All implementations should be thread-safe; the current one
-// (TCMallocImplementation) is.
-class PERFTOOLS_DLL_DECL MallocExtension {
- public:
-  virtual ~MallocExtension();
-
-  // Call this very early in the program execution -- say, in a global
-  // constructor -- to set up parameters and state needed by all
-  // instrumented malloc implemenatations.  One example: this routine
-  // sets environemnt variables to tell STL to use libc's malloc()
-  // instead of doing its own memory management.  This is safe to call
-  // multiple times, as long as each time is before threads start up.
-  static void Initialize();
-
-  // See "verify_memory.h" to see what these routines do
-  virtual bool VerifyAllMemory();
-  virtual bool VerifyNewMemory(const void* p);
-  virtual bool VerifyArrayNewMemory(const void* p);
-  virtual bool VerifyMallocMemory(const void* p);
-  virtual bool MallocMemoryStats(int* blocks, size_t* total,
-                                 int histogram[kMallocHistogramSize]);
-
-  // Get a human readable description of the following malloc data structures.
-  // - Total inuse memory by application.
-  // - Free memory(thread, central and page heap),
-  // - Freelist of central cache, each class.
-  // - Page heap freelist.
-  // The state is stored as a null-terminated string
-  // in a prefix of "buffer[0,buffer_length-1]".
-  // REQUIRES: buffer_length > 0.
-  virtual void GetStats(char* buffer, int buffer_length);
-
-  // Outputs to "writer" a sample of live objects and the stack traces
-  // that allocated these objects.  The format of the returned output
-  // is equivalent to the output of the heap profiler and can
-  // therefore be passed to "pprof". This function is equivalent to
-  // ReadStackTraces. The main difference is that this function returns
-  // serialized data appropriately formatted for use by the pprof tool.
-  // NOTE: by default, tcmalloc does not do any heap sampling, and this
-  //       function will always return an empty sample.  To get useful
-  //       data from GetHeapSample, you must also set the environment
-  //       variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288.
-  virtual void GetHeapSample(MallocExtensionWriter* writer);
-
-  // Outputs to "writer" the stack traces that caused growth in the
-  // address space size.  The format of the returned output is
-  // equivalent to the output of the heap profiler and can therefore
-  // be passed to "pprof". This function is equivalent to
-  // ReadHeapGrowthStackTraces. The main difference is that this function
-  // returns serialized data appropriately formatted for use by the
-  // pprof tool.  (This does not depend on, or require,
-  // TCMALLOC_SAMPLE_PARAMETER.)
-  virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
-
-  // Invokes func(arg, range) for every controlled memory
-  // range.  *range is filled in with information about the range.
-  //
-  // This is a best-effort interface useful only for performance
-  // analysis.  The implementation may not call func at all.
-  typedef void (RangeFunction)(void*, const base::MallocRange*);
-  virtual void Ranges(void* arg, RangeFunction func);
-
-  // -------------------------------------------------------------------
-  // Control operations for getting and setting malloc implementation
-  // specific parameters.  Some currently useful properties:
-  //
-  // generic
-  // -------
-  // "generic.current_allocated_bytes"
-  //      Number of bytes currently allocated by application
-  //      This property is not writable.
-  //
-  // "generic.heap_size"
-  //      Number of bytes in the heap ==
-  //            current_allocated_bytes +
-  //            fragmentation +
-  //            freed memory regions
-  //      This property is not writable.
-  //
-  // tcmalloc
-  // --------
-  // "tcmalloc.max_total_thread_cache_bytes"
-  //      Upper limit on total number of bytes stored across all
-  //      per-thread caches.  Default: 16MB.
-  //
-  // "tcmalloc.current_total_thread_cache_bytes"
-  //      Number of bytes used across all thread caches.
-  //      This property is not writable.
-  //
-  // "tcmalloc.central_cache_free_bytes"
-  //      Number of free bytes in the central cache that have been
-  //      assigned to size classes. They always count towards virtual
-  //      memory usage, and unless the underlying memory is swapped out
-  //      by the OS, they also count towards physical memory usage.
-  //      This property is not writable.
-  //
-  // "tcmalloc.transfer_cache_free_bytes"
-  //      Number of free bytes that are waiting to be transfered between
-  //      the central cache and a thread cache. They always count
-  //      towards virtual memory usage, and unless the underlying memory
-  //      is swapped out by the OS, they also count towards physical
-  //      memory usage. This property is not writable.
-  //
-  // "tcmalloc.thread_cache_free_bytes"
-  //      Number of free bytes in thread caches. They always count
-  //      towards virtual memory usage, and unless the underlying memory
-  //      is swapped out by the OS, they also count towards physical
-  //      memory usage. This property is not writable.
-  //
-  // "tcmalloc.pageheap_free_bytes"
-  //      Number of bytes in free, mapped pages in page heap.  These
-  //      bytes can be used to fulfill allocation requests.  They
-  //      always count towards virtual memory usage, and unless the
-  //      underlying memory is swapped out by the OS, they also count
-  //      towards physical memory usage.  This property is not writable.
-  //
-  // "tcmalloc.pageheap_unmapped_bytes"
-  //        Number of bytes in free, unmapped pages in page heap.
-  //        These are bytes that have been released back to the OS,
-  //        possibly by one of the MallocExtension "Release" calls.
-  //        They can be used to fulfill allocation requests, but
-  //        typically incur a page fault.  They always count towards
-  //        virtual memory usage, and depending on the OS, typically
-  //        do not count towards physical memory usage.  This property
-  //        is not writable.
-  // -------------------------------------------------------------------
-
-  // Get the named "property"'s value.  Returns true if the property
-  // is known.  Returns false if the property is not a valid property
-  // name for the current malloc implementation.
-  // REQUIRES: property != NULL; value != NULL
-  virtual bool GetNumericProperty(const char* property, size_t* value);
-
-  // Set the named "property"'s value.  Returns true if the property
-  // is known and writable.  Returns false if the property is not a
-  // valid property name for the current malloc implementation, or
-  // is not writable.
-  // REQUIRES: property != NULL
-  virtual bool SetNumericProperty(const char* property, size_t value);
-
-  // Mark the current thread as "idle".  This routine may optionally
-  // be called by threads as a hint to the malloc implementation that
-  // any thread-specific resources should be released.  Note: this may
-  // be an expensive routine, so it should not be called too often.
-  //
-  // Also, if the code that calls this routine will go to sleep for
-  // a while, it should take care to not allocate anything between
-  // the call to this routine and the beginning of the sleep.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadIdle();
-
-  // Mark the current thread as "busy".  This routine should be
-  // called after MarkThreadIdle() if the thread will now do more
-  // work.  If this method is not called, performance may suffer.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadBusy();
-
-  // Gets the system allocator used by the malloc extension instance. Returns
-  // NULL for malloc implementations that do not support pluggable system
-  // allocators.
-  virtual SysAllocator* GetSystemAllocator();
-
-  // Sets the system allocator to the specified.
-  //
-  // Users could register their own system allocators for malloc implementation
-  // that supports pluggable system allocators, such as TCMalloc, by doing:
-  //   alloc = new MyOwnSysAllocator();
-  //   MallocExtension::instance()->SetSystemAllocator(alloc);
-  // It's up to users whether to fall back (recommended) to the default
-  // system allocator (use GetSystemAllocator() above) or not. The caller is
-  // responsible to any necessary locking.
-  // See tcmalloc/system-alloc.h for the interface and
-  //     tcmalloc/memfs_malloc.cc for the examples.
-  //
-  // It's a no-op for malloc implementations that do not support pluggable
-  // system allocators.
-  virtual void SetSystemAllocator(SysAllocator *a);
-
-  // Try to release num_bytes of free memory back to the operating
-  // system for reuse.  Use this extension with caution -- to get this
-  // memory back may require faulting pages back in by the OS, and
-  // that may be slow.  (Currently only implemented in tcmalloc.)
-  virtual void ReleaseToSystem(size_t num_bytes);
-
-  // Same as ReleaseToSystem() but release as much memory as possible.
-  virtual void ReleaseFreeMemory();
-
-  // Sets the rate at which we release unused memory to the system.
-  // Zero means we never release memory back to the system.  Increase
-  // this flag to return memory faster; decrease it to return memory
-  // slower.  Reasonable rates are in the range [0,10].  (Currently
-  // only implemented in tcmalloc).
-  virtual void SetMemoryReleaseRate(double rate);
-
-  // Gets the release rate.  Returns a value < 0 if unknown.
-  virtual double GetMemoryReleaseRate();
-
-  // Returns the estimated number of bytes that will be allocated for
-  // a request of "size" bytes.  This is an estimate: an allocation of
-  // SIZE bytes may reserve more bytes, but will never reserve less.
-  // (Currently only implemented in tcmalloc, other implementations
-  // always return SIZE.)
-  // This is equivalent to malloc_good_size() in OS X.
-  virtual size_t GetEstimatedAllocatedSize(size_t size);
-
-  // Returns the actual number N of bytes reserved by tcmalloc for the
-  // pointer p.  The client is allowed to use the range of bytes
-  // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this
-  // allocation).  This number may be equal to or greater than the number
-  // of bytes requested when p was allocated.
-  // p must have been allocated by this malloc implementation,
-  // must not be an interior pointer -- that is, must be exactly
-  // the pointer returned to by malloc() et al., not some offset
-  // from that -- and should not have been freed yet.  p may be NULL.
-  // (Currently only implemented in tcmalloc; other implementations
-  // will return 0.)
-  // This is equivalent to malloc_size() in OS X, malloc_usable_size()
-  // in glibc, and _msize() for windows.
-  virtual size_t GetAllocatedSize(const void* p);
-
-  // Returns kOwned if this malloc implementation allocated the memory
-  // pointed to by p, or kNotOwned if some other malloc implementation
-  // allocated it or p is NULL.  May also return kUnknownOwnership if
-  // the malloc implementation does not keep track of ownership.
-  // REQUIRES: p must be a value returned from a previous call to
-  // malloc(), calloc(), realloc(), memalign(), posix_memalign(),
-  // valloc(), pvalloc(), new, or new[], and must refer to memory that
-  // is currently allocated (so, for instance, you should not pass in
-  // a pointer after having called free() on it).
-  enum Ownership {
-    // NOTE: Enum values MUST be kept in sync with the version in
-    // malloc_extension_c.h
-    kUnknownOwnership = 0,
-    kOwned,
-    kNotOwned
-  };
-  virtual Ownership GetOwnership(const void* p);
-
-  // The current malloc implementation.  Always non-NULL.
-  static MallocExtension* instance();
-
-  // Change the malloc implementation.  Typically called by the
-  // malloc implementation during initialization.
-  static void Register(MallocExtension* implementation);
-
-  // Returns detailed information about malloc's freelists. For each list,
-  // return a FreeListInfo:
-  struct FreeListInfo {
-    size_t min_object_size;
-    size_t max_object_size;
-    size_t total_bytes_free;
-    const char* type;
-  };
-  // Each item in the vector refers to a different freelist. The lists
-  // are identified by the range of allocations that objects in the
-  // list can satisfy ([min_object_size, max_object_size]) and the
-  // type of freelist (see below). The current size of the list is
-  // returned in total_bytes_free (which count against a processes
-  // resident and virtual size).
-  //
-  // Currently supported types are:
-  //
-  // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size
-  //          class in the page heap is returned. Bytes in "page_unmapped"
-  //          are no longer backed by physical memory and do not count against
-  //          the resident size of a process.
-  //
-  // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger
-  //          than the largest page heap size class. Only one "large"
-  //          entry is returned. There is no upper-bound on the size
-  //          of objects in the large free list; this call returns
-  //          kint64max for max_object_size.  Bytes in
-  //          "large_unmapped" are no longer backed by physical memory
-  //          and do not count against the resident size of a process.
-  //
-  // "tcmalloc.central" - tcmalloc's central free-list. One entry per
-  //          size-class is returned. Never unmapped.
-  //
-  // "debug.free_queue" - free objects queued by the debug allocator
-  //                      and not returned to tcmalloc.
-  //
-  // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped.
-  virtual void GetFreeListSizes(std::vector<FreeListInfo>* v);
-
-  // Get a list of stack traces of sampled allocation points.  Returns
-  // a pointer to a "new[]-ed" result array, and stores the sample
-  // period in "sample_period".
-  //
-  // The state is stored as a sequence of adjacent entries
-  // in the returned array.  Each entry has the following form:
-  //    uintptr_t count;        // Number of objects with following trace
-  //    uintptr_t size;         // Total size of objects with following trace
-  //    uintptr_t depth;        // Number of PC values in stack trace
-  //    void*     stack[depth]; // PC values that form the stack trace
-  //
-  // The list of entries is terminated by a "count" of 0.
-  //
-  // It is the responsibility of the caller to "delete[]" the returned array.
-  //
-  // May return NULL to indicate no results.
-  //
-  // This is an internal extension.  Callers should use the more
-  // convenient "GetHeapSample(string*)" method defined above.
-  virtual void** ReadStackTraces(int* sample_period);
-
-  // Like ReadStackTraces(), but returns stack traces that caused growth
-  // in the address space size.
-  virtual void** ReadHeapGrowthStackTraces();
-
-  // Returns the size in bytes of the calling threads cache.
-  virtual size_t GetThreadCacheSize();
-
-  // Like MarkThreadIdle, but does not destroy the internal data
-  // structures of the thread cache. When the thread resumes, it wil
-  // have an empty cache but will not need to pay to reconstruct the
-  // cache data structures.
-  virtual void MarkThreadTemporarilyIdle();
-};
-
-namespace base {
-
-// Information passed per range.  More fields may be added later.
-struct MallocRange {
-  enum Type {
-    INUSE,                // Application is using this range
-    FREE,                 // Range is currently free
-    UNMAPPED,             // Backing physical memory has been returned to the OS
-    UNKNOWN
-    // More enum values may be added in the future
-  };
-
-  uintptr_t address;    // Address of range
-  size_t length;        // Byte length of range
-  Type type;            // Type of this range
-  double fraction;      // Fraction of range that is being used (0 if !INUSE)
-
-  // Perhaps add the following:
-  // - stack trace if this range was sampled
-  // - heap growth stack trace if applicable to this range
-  // - age when allocated (for inuse) or freed (if not in use)
-};
-
-} // namespace base
-
-#endif  // BASE_MALLOC_EXTENSION_H_
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h b/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h
deleted file mode 100644
index 70ff6868ecf..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_extension_c.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_extension.h.  See malloc_extension.h for
- * details.  Note these C shims always work on
- * MallocExtension::instance(); it is not possible to have more than
- * one MallocExtension object in C applications.
- */
-
-#ifndef _MALLOC_EXTENSION_C_H_
-#define _MALLOC_EXTENSION_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows -- makes sure clients can import these fns */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define kMallocExtensionHistogramSize 64
-
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total,
-                                      int histogram[kMallocExtensionHistogramSize]);
-PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length);
-
-/* TODO(csilvers): write a C version of these routines, that perhaps
- * takes a function ptr and a void *.
- */
-/* void MallocExtension_GetHeapSample(string* result); */
-/* void MallocExtension_GetHeapGrowthStacks(string* result); */
-
-PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value);
-PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetThreadCacheSize(void);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadTemporarilyIdle(void);
-
-/*
- * NOTE: These enum values MUST be kept in sync with the version in
- *       malloc_extension.h
- */
-typedef enum {
-  MallocExtension_kUnknownOwnership = 0,
-  MallocExtension_kOwned,
-  MallocExtension_kNotOwned
-} MallocExtension_Ownership;
-
-PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p);
-
-#ifdef __cplusplus
-}   /* extern "C" */
-#endif
-
-#endif /* _MALLOC_EXTENSION_C_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_hook.h b/contrib/libtcmalloc/include/gperftools/malloc_hook.h
deleted file mode 100644
index b76411fb590..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_hook.h
+++ /dev/null
@@ -1,359 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Some of our malloc implementations can invoke the following hooks whenever
-// memory is allocated or deallocated.  MallocHook is thread-safe, and things
-// you do before calling AddFooHook(MyHook) are visible to any resulting calls
-// to MyHook.  Hooks must be thread-safe.  If you write:
-//
-//   CHECK(MallocHook::AddNewHook(&MyNewHook));
-//
-// MyNewHook will be invoked in subsequent calls in the current thread, but
-// there are no guarantees on when it might be invoked in other threads.
-//
-// There are a limited number of slots available for each hook type.  Add*Hook
-// will return false if there are no slots available.  Remove*Hook will return
-// false if the given hook was not already installed.
-//
-// The order in which individual hooks are called in Invoke*Hook is undefined.
-//
-// It is safe for a hook to remove itself within Invoke*Hook and add other
-// hooks.  Any hooks added inside a hook invocation (for the same hook type)
-// will not be invoked for the current invocation.
-//
-// One important user of these hooks is the heap profiler.
-//
-// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be
-// directly in the code of the (de)allocation function that is provided to the
-// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute.
-//
-// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h.  If you
-// need to invoke a hook (which you shouldn't unless you're part of tcmalloc),
-// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h.
-//
-// NOTE FOR C USERS: If you want to use malloc_hook functionality from
-// a C program, #include malloc_hook_c.h instead of this file.
-
-#ifndef _MALLOC_HOOK_H_
-#define _MALLOC_HOOK_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-extern "C" {
-#include "malloc_hook_c.h"  // a C version of the malloc_hook interface
-}
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-// The C++ methods below call the C version (MallocHook_*), and thus
-// convert between an int and a bool.  Windows complains about this
-// (a "performance warning") which we don't care about, so we suppress.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable:4800)
-#endif
-
-// Note: malloc_hook_c.h defines MallocHook_*Hook and
-// MallocHook_{Add,Remove}*Hook.  The version of these inside the MallocHook
-// class are defined in terms of the malloc_hook_c version.  See malloc_hook_c.h
-// for details of these types/functions.
-
-class PERFTOOLS_DLL_DECL MallocHook {
- public:
-  // The NewHook is invoked whenever an object is allocated.
-  // It may be passed NULL if the allocator returned NULL.
-  typedef MallocHook_NewHook NewHook;
-  inline static bool AddNewHook(NewHook hook) {
-    return MallocHook_AddNewHook(hook);
-  }
-  inline static bool RemoveNewHook(NewHook hook) {
-    return MallocHook_RemoveNewHook(hook);
-  }
-  inline static void InvokeNewHook(const void* p, size_t s);
-
-  // The DeleteHook is invoked whenever an object is deallocated.
-  // It may be passed NULL if the caller is trying to delete NULL.
-  typedef MallocHook_DeleteHook DeleteHook;
-  inline static bool AddDeleteHook(DeleteHook hook) {
-    return MallocHook_AddDeleteHook(hook);
-  }
-  inline static bool RemoveDeleteHook(DeleteHook hook) {
-    return MallocHook_RemoveDeleteHook(hook);
-  }
-  inline static void InvokeDeleteHook(const void* p);
-
-  // The PreMmapHook is invoked with mmap or mmap64 arguments just
-  // before the call is actually made.  Such a hook may be useful
-  // in memory limited contexts, to catch allocations that will exceed
-  // a memory limit, and take outside actions to increase that limit.
-  typedef MallocHook_PreMmapHook PreMmapHook;
-  inline static bool AddPreMmapHook(PreMmapHook hook) {
-    return MallocHook_AddPreMmapHook(hook);
-  }
-  inline static bool RemovePreMmapHook(PreMmapHook hook) {
-    return MallocHook_RemovePreMmapHook(hook);
-  }
-  inline static void InvokePreMmapHook(const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-
-  // The MmapReplacement is invoked after the PreMmapHook but before
-  // the call is actually made. The MmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call mmap/mmap64.
-  // This should be used only by experts, and users must be be
-  // extremely careful to avoid recursive calls to mmap. The replacement
-  // should be async signal safe.
-  // Only one MmapReplacement is supported. After setting an MmapReplacement
-  // you must call RemoveMmapReplacement before calling SetMmapReplacement
-  // again.
-  typedef MallocHook_MmapReplacement MmapReplacement;
-  inline static bool SetMmapReplacement(MmapReplacement hook) {
-    return MallocHook_SetMmapReplacement(hook);
-  }
-  inline static bool RemoveMmapReplacement(MmapReplacement hook) {
-    return MallocHook_RemoveMmapReplacement(hook);
-  }
-  inline static bool InvokeMmapReplacement(const void* start,
-                                           size_t size,
-                                           int protection,
-                                           int flags,
-                                           int fd,
-                                           off_t offset,
-                                           void** result);
-
-
-  // The MmapHook is invoked whenever a region of memory is mapped.
-  // It may be passed MAP_FAILED if the mmap failed.
-  typedef MallocHook_MmapHook MmapHook;
-  inline static bool AddMmapHook(MmapHook hook) {
-    return MallocHook_AddMmapHook(hook);
-  }
-  inline static bool RemoveMmapHook(MmapHook hook) {
-    return MallocHook_RemoveMmapHook(hook);
-  }
-  inline static void InvokeMmapHook(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-
-  // The MunmapReplacement is invoked with munmap arguments just before
-  // the call is actually made. The MunmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call munmap.
-  // This should be used only by experts. The replacement should be
-  // async signal safe.
-  // Only one MunmapReplacement is supported. After setting an
-  // MunmapReplacement you must call RemoveMunmapReplacement before
-  // calling SetMunmapReplacement again.
-  typedef MallocHook_MunmapReplacement MunmapReplacement;
-  inline static bool SetMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_SetMunmapReplacement(hook);
-  }
-  inline static bool RemoveMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_RemoveMunmapReplacement(hook);
-  }
-  inline static bool InvokeMunmapReplacement(const void* p,
-                                             size_t size,
-                                             int* result);
-
-  // The MunmapHook is invoked whenever a region of memory is unmapped.
-  typedef MallocHook_MunmapHook MunmapHook;
-  inline static bool AddMunmapHook(MunmapHook hook) {
-    return MallocHook_AddMunmapHook(hook);
-  }
-  inline static bool RemoveMunmapHook(MunmapHook hook) {
-    return MallocHook_RemoveMunmapHook(hook);
-  }
-  inline static void InvokeMunmapHook(const void* p, size_t size);
-
-  // The MremapHook is invoked whenever a region of memory is remapped.
-  typedef MallocHook_MremapHook MremapHook;
-  inline static bool AddMremapHook(MremapHook hook) {
-    return MallocHook_AddMremapHook(hook);
-  }
-  inline static bool RemoveMremapHook(MremapHook hook) {
-    return MallocHook_RemoveMremapHook(hook);
-  }
-  inline static void InvokeMremapHook(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-
-  // The PreSbrkHook is invoked just before sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.  It may be useful in memory-limited contexts,
-  // to catch allocations that will exceed the limit and take outside
-  // actions to increase such a limit.
-  typedef MallocHook_PreSbrkHook PreSbrkHook;
-  inline static bool AddPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_AddPreSbrkHook(hook);
-  }
-  inline static bool RemovePreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_RemovePreSbrkHook(hook);
-  }
-  inline static void InvokePreSbrkHook(ptrdiff_t increment);
-
-  // The SbrkHook is invoked whenever sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.
-  typedef MallocHook_SbrkHook SbrkHook;
-  inline static bool AddSbrkHook(SbrkHook hook) {
-    return MallocHook_AddSbrkHook(hook);
-  }
-  inline static bool RemoveSbrkHook(SbrkHook hook) {
-    return MallocHook_RemoveSbrkHook(hook);
-  }
-  inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment);
-
-  // Get the current stack trace.  Try to skip all routines up to and
-  // and including the caller of MallocHook::Invoke*.
-  // Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
-  // as a hint about how many routines to skip if better information
-  // is not available.
-  inline static int GetCallerStackTrace(void** result, int max_depth,
-                                        int skip_count) {
-    return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
-  }
-
-  // Unhooked versions of mmap() and munmap().   These should be used
-  // only by experts, since they bypass heapchecking, etc.
-  // Note: These do not run hooks, but they still use the MmapReplacement
-  // and MunmapReplacement.
-  static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
-                            int fd, off_t offset);
-  static int UnhookedMUnmap(void *start, size_t length);
-
-  // The following are DEPRECATED.
-  inline static NewHook GetNewHook();
-  inline static NewHook SetNewHook(NewHook hook) {
-    return MallocHook_SetNewHook(hook);
-  }
-
-  inline static DeleteHook GetDeleteHook();
-  inline static DeleteHook SetDeleteHook(DeleteHook hook) {
-    return MallocHook_SetDeleteHook(hook);
-  }
-
-  inline static PreMmapHook GetPreMmapHook();
-  inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) {
-    return MallocHook_SetPreMmapHook(hook);
-  }
-
-  inline static MmapHook GetMmapHook();
-  inline static MmapHook SetMmapHook(MmapHook hook) {
-    return MallocHook_SetMmapHook(hook);
-  }
-
-  inline static MunmapHook GetMunmapHook();
-  inline static MunmapHook SetMunmapHook(MunmapHook hook) {
-    return MallocHook_SetMunmapHook(hook);
-  }
-
-  inline static MremapHook GetMremapHook();
-  inline static MremapHook SetMremapHook(MremapHook hook) {
-    return MallocHook_SetMremapHook(hook);
-  }
-
-  inline static PreSbrkHook GetPreSbrkHook();
-  inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_SetPreSbrkHook(hook);
-  }
-
-  inline static SbrkHook GetSbrkHook();
-  inline static SbrkHook SetSbrkHook(SbrkHook hook) {
-    return MallocHook_SetSbrkHook(hook);
-  }
-  // End of DEPRECATED methods.
-
- private:
-  // Slow path versions of Invoke*Hook.
-  static void InvokeNewHookSlow(const void* p, size_t s);
-  static void InvokeDeleteHookSlow(const void* p);
-  static void InvokePreMmapHookSlow(const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-  static void InvokeMmapHookSlow(const void* result,
-                                 const void* start,
-                                 size_t size,
-                                 int protection,
-                                 int flags,
-                                 int fd,
-                                 off_t offset);
-  static bool InvokeMmapReplacementSlow(const void* start,
-                                        size_t size,
-                                        int protection,
-                                        int flags,
-                                        int fd,
-                                        off_t offset,
-                                        void** result);
-  static void InvokeMunmapHookSlow(const void* p, size_t size);
-  static bool InvokeMunmapReplacementSlow(const void* p,
-                                          size_t size,
-                                          int* result);
-  static void InvokeMremapHookSlow(const void* result,
-                                   const void* old_addr,
-                                   size_t old_size,
-                                   size_t new_size,
-                                   int flags,
-                                   const void* new_addr);
-  static void InvokePreSbrkHookSlow(ptrdiff_t increment);
-  static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment);
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-
-#endif /* _MALLOC_HOOK_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h b/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h
deleted file mode 100644
index 56337e15e83..00000000000
--- a/contrib/libtcmalloc/include/gperftools/malloc_hook_c.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_hook.h.  See malloc_hook.h for details
- * on how to use these.
- */
-
-#ifndef _MALLOC_HOOK_C_H_
-#define _MALLOC_HOOK_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Get the current stack trace.  Try to skip all routines up to and
- * and including the caller of MallocHook::Invoke*.
- * Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
- * as a hint about how many routines to skip if better information
- * is not available.
- */
-PERFTOOLS_DLL_DECL
-int MallocHook_GetCallerStackTrace(void** result, int max_depth,
-                                   int skip_count);
-
-/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on
- * failure.
- */
-
-typedef void (*MallocHook_NewHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveNewHook(MallocHook_NewHook hook);
-
-typedef void (*MallocHook_DeleteHook)(const void* ptr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook);
-
-typedef void (*MallocHook_PreMmapHook)(const void *start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook);
-
-typedef void (*MallocHook_MmapHook)(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook);
-
-typedef int (*MallocHook_MmapReplacement)(const void* start,
-                                          size_t size,
-                                          int protection,
-                                          int flags,
-                                          int fd,
-                                          off_t offset,
-                                          void** result);
-int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook);
-int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook);
-
-typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook);
-
-typedef int (*MallocHook_MunmapReplacement)(const void* ptr,
-                                            size_t size,
-                                            int* result);
-int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook);
-int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook);
-
-typedef void (*MallocHook_MremapHook)(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook);
-
-typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook);
-
-typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook);
-
-/* The following are DEPRECATED. */
-PERFTOOLS_DLL_DECL
-MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook);
-/* End of DEPRECATED functions. */
-
-#ifdef __cplusplus
-}   // extern "C"
-#endif
-
-#endif /* _MALLOC_HOOK_C_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/profiler.h b/contrib/libtcmalloc/include/gperftools/profiler.h
deleted file mode 100644
index 2d272d616a9..00000000000
--- a/contrib/libtcmalloc/include/gperftools/profiler.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for CPU profiling based on periodic pc-sampling.
- *
- * For full(er) information, see doc/cpuprofile.html
- *
- * This module is linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "CPUPROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "ProfilerStart(filename)" and "ProfilerStop()".
- *
- *
- * (Note: if using linux 2.4 or earlier, only the main thread may be
- * profiled.)
- *
- * Use pprof to view the resulting profile output.
- *    % pprof <path_to_executable> <profile_file_name>
- *    % pprof --gv  <path_to_executable> <profile_file_name>
- *
- * These functions are thread-safe.
- */
-
-#ifndef BASE_PROFILER_H_
-#define BASE_PROFILER_H_
-
-#include <time.h>       /* For time_t */
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Profiler options, for use with ProfilerStartWithOptions.  To use:
- *
- *   struct ProfilerOptions options;
- *   memset(&options, 0, sizeof options);
- *
- * then fill in fields as needed.
- *
- * This structure is intended to be usable from C code, so no constructor
- * is provided to initialize it.  (Use memset as described above).
- */
-struct ProfilerOptions {
-  /* Filter function and argument.
-   *
-   * If filter_in_thread is not NULL, when a profiling tick is delivered
-   * the profiler will call:
-   *
-   *   (*filter_in_thread)(filter_in_thread_arg)
-   *
-   * If it returns nonzero, the sample will be included in the profile.
-   * Note that filter_in_thread runs in a signal handler, so must be
-   * async-signal-safe.
-   *
-   * A typical use would be to set up filter results for each thread
-   * in the system before starting the profiler, then to make
-   * filter_in_thread be a very simple function which retrieves those
-   * results in an async-signal-safe way.  Retrieval could be done
-   * using thread-specific data, or using a shared data structure that
-   * supports async-signal-safe lookups.
-   */
-  int (*filter_in_thread)(void *arg);
-  void *filter_in_thread_arg;
-};
-
-/* Start profiling and write profile info into fname, discarding any
- * existing profiling data in that file.
- *
- * This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
- */
-PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
-
-/* Start profiling and write profile into fname, discarding any
- * existing profiling data in that file.
- *
- * The profiler is configured using the options given by 'options'.
- * Options which are not specified are given default values.
- *
- * 'options' may be NULL, in which case all are given default values.
- *
- * Returns nonzero if profiling was started successfully, or zero else.
- */
-PERFTOOLS_DLL_DECL int ProfilerStartWithOptions(
-    const char *fname, const struct ProfilerOptions *options);
-
-/* Stop profiling. Can be started again with ProfilerStart(), but
- * the currently accumulated profiling data will be cleared.
- */
-PERFTOOLS_DLL_DECL void ProfilerStop(void);
-
-/* Flush any currently buffered profiling state to the profile file.
- * Has no effect if the profiler has not been started.
- */
-PERFTOOLS_DLL_DECL void ProfilerFlush(void);
-
-
-/* DEPRECATED: these functions were used to enable/disable profiling
- * in the current thread, but no longer do anything.
- */
-PERFTOOLS_DLL_DECL void ProfilerEnable(void);
-PERFTOOLS_DLL_DECL void ProfilerDisable(void);
-
-/* Returns nonzero if profile is currently enabled, zero if it's not. */
-PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(void);
-
-/* Routine for registering new threads with the profiler.
- */
-PERFTOOLS_DLL_DECL void ProfilerRegisterThread(void);
-
-/* Stores state about profiler's current status into "*state". */
-struct ProfilerState {
-  int    enabled;             /* Is profiling currently enabled? */
-  time_t start_time;          /* If enabled, when was profiling started? */
-  char   profile_name[1024];  /* Name of profile file being written, or '\0' */
-  int    samples_gathered;    /* Number of samples gathered so far (or 0) */
-};
-PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_PROFILER_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/stacktrace.h b/contrib/libtcmalloc/include/gperftools/stacktrace.h
deleted file mode 100644
index 2b9c5a13209..00000000000
--- a/contrib/libtcmalloc/include/gperftools/stacktrace.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Routines to extract the current stack trace.  These functions are
-// thread-safe.
-
-#ifndef GOOGLE_STACKTRACE_H_
-#define GOOGLE_STACKTRACE_H_
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// Skips the most recent "skip_count" stack frames (also skips the
-// frame generated for the "GetStackFrames" routine itself), and then
-// records the pc values for up to the next "max_depth" frames in
-// "result", and the corresponding stack frame sizes in "sizes".
-// Returns the number of values recorded in "result"/"sizes".
-//
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int sizes[10];
-//        int depth = GetStackFrames(result, sizes, 10, 1);
-//      }
-//
-// The GetStackFrames call will skip the frame for "bar".  It will
-// return 2 and will produce pc values that map to the following
-// procedures:
-//      result[0]       foo
-//      result[1]       main
-// (Actually, there may be a few more entries after "main" to account for
-// startup procedures.)
-// And corresponding stack frame sizes will also be recorded:
-//    sizes[0]       16
-//    sizes[1]       16
-// (Stack frame sizes of 16 above are just for illustration purposes.)
-// Stack frame sizes of 0 or less indicate that those frame sizes couldn't
-// be identified.
-//
-// This routine may return fewer stack frame entries than are
-// available. Also note that "result" and "sizes" must both be non-NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
-                          int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
-                                     int skip_count, const void *uc);
-
-// This is similar to the GetStackFrames routine, except that it returns
-// the stack trace only, and not the stack frame sizes as well.
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int depth = GetStackTrace(result, 10, 1);
-//      }
-//
-// This produces:
-//      result[0]       foo
-//      result[1]       main
-//           ....       ...
-//
-// "result" must not be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
-                                            int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
-                                    int skip_count, const void *uc);
-
-#endif /* GOOGLE_STACKTRACE_H_ */
diff --git a/contrib/libtcmalloc/include/gperftools/tcmalloc.h b/contrib/libtcmalloc/include/gperftools/tcmalloc.h
deleted file mode 100644
index a5b39dbffbe..00000000000
--- a/contrib/libtcmalloc/include/gperftools/tcmalloc.h
+++ /dev/null
@@ -1,160 +0,0 @@
-// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2003, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat <opensource@google.com>
- *         .h file by Craig Silverstein <opensource@google.com>
- */
-
-#ifndef TCMALLOC_TCMALLOC_H_
-#define TCMALLOC_TCMALLOC_H_
-
-#include <stddef.h>                     /* for size_t */
-
-/* Define the version number so folks can check against it */
-#define TC_VERSION_MAJOR  2
-#define TC_VERSION_MINOR  5
-#define TC_VERSION_PATCH  ""
-#define TC_VERSION_STRING "gperftools 2.5"
-
-/* For struct mallinfo, if it's defined. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-# include <malloc.h>
-#else
-struct mallinfo {
-   size_t arena;    /* non-mmapped space allocated from system */
-   size_t ordblks;  /* number of free chunks */
-   size_t smblks;   /* always 0 */
-   size_t hblks;    /* always 0 */
-   size_t hblkhd;   /* space in mmapped regions */
-   size_t usmblks;  /* maximum total allocated space */
-   size_t fsmblks;  /* always 0 */
-   size_t uordblks; /* total allocated space */
-   size_t fordblks; /* total free space */
-   size_t keepcost; /* releasable (via malloc_trim) space */
-};
-#endif
-
-#ifdef __cplusplus
-#define PERFTOOLS_THROW throw()
-#else
-# ifdef __GNUC__
-#  define PERFTOOLS_THROW __attribute__((__nothrow__))
-# else
-#  define PERFTOOLS_THROW
-# endif
-#endif
-
-#ifndef PERFTOOLS_DLL_DECL
-#define PERFTOOLS_DLL_DECL_DEFINED
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-namespace std {
-struct nothrow_t;
-}
-
-extern "C" {
-#endif
-  /*
-   * Returns a human-readable version string.  If major, minor,
-   * and/or patch are not NULL, they are set to the major version,
-   * minor version, and patch-code (a string, usually "").
-   */
-  PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor,
-                                            const char** patch) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment,
-                                       size_t __size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr,
-                                           size_t align, size_t size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW;
-
-  PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW;
-#if 1
-  PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW;
-#endif
-
-  /*
-   * This is an alias for MallocExtension::instance()->GetAllocatedSize().
-   * It is equivalent to
-   *    OS X: malloc_size()
-   *    glibc: malloc_usable_size()
-   *    Windows: _msize()
-   */
-  PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW;
-
-#ifdef __cplusplus
-  PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_new(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
-                                          const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw();
-  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
-                                            const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
-                                               const std::nothrow_t&) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw();
-  PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
-                                                 const std::nothrow_t&) PERFTOOLS_THROW;
-}
-#endif
-
-/* We're only un-defining those for public */
-#if !defined(GPERFTOOLS_CONFIG_H_)
-
-#undef PERFTOOLS_THROW
-
-#ifdef PERFTOOLS_DLL_DECL_DEFINED
-#undef PERFTOOLS_DLL_DECL
-#undef PERFTOOLS_DLL_DECL_DEFINED
-#endif
-
-#endif /* GPERFTOOLS_CONFIG_H_ */
-
-#endif  /* #ifndef TCMALLOC_TCMALLOC_H_ */
diff --git a/contrib/libtcmalloc/src/addressmap-inl.h b/contrib/libtcmalloc/src/addressmap-inl.h
deleted file mode 100644
index fd1dc5b6ffe..00000000000
--- a/contrib/libtcmalloc/src/addressmap-inl.h
+++ /dev/null
@@ -1,422 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// A fast map from addresses to values.  Assumes that addresses are
-// clustered.  The main use is intended to be for heap-profiling.
-// May be too memory-hungry for other uses.
-//
-// We use a user-defined allocator/de-allocator so that we can use
-// this data structure during heap-profiling.
-//
-// IMPLEMENTATION DETAIL:
-//
-// Some default definitions/parameters:
-//  * Block      -- aligned 128-byte region of the address space
-//  * Cluster    -- aligned 1-MB region of the address space
-//  * Block-ID   -- block-number within a cluster
-//  * Cluster-ID -- Starting address of cluster divided by cluster size
-//
-// We use a three-level map to represent the state:
-//  1. A hash-table maps from a cluster-ID to the data for that cluster.
-//  2. For each non-empty cluster we keep an array indexed by
-//     block-ID tht points to the first entry in the linked-list
-//     for the block.
-//  3. At the bottom, we keep a singly-linked list of all
-//     entries in a block (for non-empty blocks).
-//
-//    hash table
-//  +-------------+
-//  | id->cluster |---> ...
-//  |     ...     |
-//  | id->cluster |--->  Cluster
-//  +-------------+     +-------+    Data for one block
-//                      |  nil  |   +------------------------------------+
-//                      |   ----+---|->[addr/value]-->[addr/value]-->... |
-//                      |  nil  |   +------------------------------------+
-//                      |   ----+--> ...
-//                      |  nil  |
-//                      |  ...  |
-//                      +-------+
-//
-// Note that we require zero-bytes of overhead for completely empty
-// clusters.  The minimum space requirement for a cluster is the size
-// of the hash-table entry plus a pointer value for each block in
-// the cluster.  Empty blocks impose no extra space requirement.
-//
-// The cost of a lookup is:
-//      a. A hash-table lookup to find the cluster
-//      b. An array access in the cluster structure
-//      c. A traversal over the linked-list for a block
-
-#ifndef BASE_ADDRESSMAP_INL_H_
-#define BASE_ADDRESSMAP_INL_H_
-
-#include "config.h"
-#include <stddef.h>
-#include <string.h>
-#if defined HAVE_STDINT_H
-#include <stdint.h>             // to get uint16_t (ISO naming madness)
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>           // another place uint16_t might be defined
-#else
-#include <sys/types.h>          // our last best hope
-#endif
-
-// This class is thread-unsafe -- that is, instances of this class can
-// not be accessed concurrently by multiple threads -- because the
-// callback function for Iterate() may mutate contained values. If the
-// callback functions you pass do not mutate their Value* argument,
-// AddressMap can be treated as thread-compatible -- that is, it's
-// safe for multiple threads to call "const" methods on this class,
-// but not safe for one thread to call const methods on this class
-// while another thread is calling non-const methods on the class.
-template <class Value>
-class AddressMap {
- public:
-  typedef void* (*Allocator)(size_t size);
-  typedef void  (*DeAllocator)(void* ptr);
-  typedef const void* Key;
-
-  // Create an AddressMap that uses the specified allocator/deallocator.
-  // The allocator/deallocator should behave like malloc/free.
-  // For instance, the allocator does not need to return initialized memory.
-  AddressMap(Allocator alloc, DeAllocator dealloc);
-  ~AddressMap();
-
-  // If the map contains an entry for "key", return it. Else return NULL.
-  inline const Value* Find(Key key) const;
-  inline Value* FindMutable(Key key);
-
-  // Insert <key,value> into the map.  Any old value associated
-  // with key is forgotten.
-  void Insert(Key key, Value value);
-
-  // Remove any entry for key in the map.  If an entry was found
-  // and removed, stores the associated value in "*removed_value"
-  // and returns true.  Else returns false.
-  bool FindAndRemove(Key key, Value* removed_value);
-
-  // Similar to Find but we assume that keys are addresses of non-overlapping
-  // memory ranges whose sizes are given by size_func.
-  // If the map contains a range into which "key" points
-  // (at its start or inside of it, but not at the end),
-  // return the address of the associated value
-  // and store its key in "*res_key".
-  // Else return NULL.
-  // max_size specifies largest range size possibly in existence now.
-  typedef size_t (*ValueSizeFunc)(const Value& v);
-  const Value* FindInside(ValueSizeFunc size_func, size_t max_size,
-                          Key key, Key* res_key);
-
-  // Iterate over the address map calling 'callback'
-  // for all stored key-value pairs and passing 'arg' to it.
-  // We don't use full Closure/Callback machinery not to add
-  // unnecessary dependencies to this class with low-level uses.
-  template<class Type>
-  inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const;
-
- private:
-  typedef uintptr_t Number;
-
-  // The implementation assumes that addresses inserted into the map
-  // will be clustered.  We take advantage of this fact by splitting
-  // up the address-space into blocks and using a linked-list entry
-  // for each block.
-
-  // Size of each block.  There is one linked-list for each block, so
-  // do not make the block-size too big.  Oterwise, a lot of time
-  // will be spent traversing linked lists.
-  static const int kBlockBits = 7;
-  static const int kBlockSize = 1 << kBlockBits;
-
-  // Entry kept in per-block linked-list
-  struct Entry {
-    Entry* next;
-    Key    key;
-    Value  value;
-  };
-
-  // We further group a sequence of consecutive blocks into a cluster.
-  // The data for a cluster is represented as a dense array of
-  // linked-lists, one list per contained block.
-  static const int kClusterBits = 13;
-  static const Number kClusterSize = 1 << (kBlockBits + kClusterBits);
-  static const int kClusterBlocks = 1 << kClusterBits;
-
-  // We use a simple chaining hash-table to represent the clusters.
-  struct Cluster {
-    Cluster* next;                      // Next cluster in hash table chain
-    Number   id;                        // Cluster ID
-    Entry*   blocks[kClusterBlocks];    // Per-block linked-lists
-  };
-
-  // Number of hash-table entries.  With the block-size/cluster-size
-  // defined above, each cluster covers 1 MB, so an 4K entry
-  // hash-table will give an average hash-chain length of 1 for 4GB of
-  // in-use memory.
-  static const int kHashBits = 12;
-  static const int kHashSize = 1 << 12;
-
-  // Number of entry objects allocated at a time
-  static const int ALLOC_COUNT = 64;
-
-  Cluster**     hashtable_;              // The hash-table
-  Entry*        free_;                   // Free list of unused Entry objects
-
-  // Multiplicative hash function:
-  // The value "kHashMultiplier" is the bottom 32 bits of
-  //    int((sqrt(5)-1)/2 * 2^32)
-  // This is a good multiplier as suggested in CLR, Knuth.  The hash
-  // value is taken to be the top "k" bits of the bottom 32 bits
-  // of the muliplied value.
-  static const uint32_t kHashMultiplier = 2654435769u;
-  static int HashInt(Number x) {
-    // Multiply by a constant and take the top bits of the result.
-    const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier;
-    return static_cast<int>(m >> (32 - kHashBits));
-  }
-
-  // Find cluster object for specified address.  If not found
-  // and "create" is true, create the object.  If not found
-  // and "create" is false, return NULL.
-  //
-  // This method is bitwise-const if create is false.
-  Cluster* FindCluster(Number address, bool create) {
-    // Look in hashtable
-    const Number cluster_id = address >> (kBlockBits + kClusterBits);
-    const int h = HashInt(cluster_id);
-    for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
-      if (c->id == cluster_id) {
-        return c;
-      }
-    }
-
-    // Create cluster if necessary
-    if (create) {
-      Cluster* c = New<Cluster>(1);
-      c->id = cluster_id;
-      c->next = hashtable_[h];
-      hashtable_[h] = c;
-      return c;
-    }
-    return NULL;
-  }
-
-  // Return the block ID for an address within its cluster
-  static int BlockID(Number address) {
-    return (address >> kBlockBits) & (kClusterBlocks - 1);
-  }
-
-  //--------------------------------------------------------------
-  // Memory management -- we keep all objects we allocate linked
-  // together in a singly linked list so we can get rid of them
-  // when we are all done.  Furthermore, we allow the client to
-  // pass in custom memory allocator/deallocator routines.
-  //--------------------------------------------------------------
-  struct Object {
-    Object* next;
-    // The real data starts here
-  };
-
-  Allocator     alloc_;                 // The allocator
-  DeAllocator   dealloc_;               // The deallocator
-  Object*       allocated_;             // List of allocated objects
-
-  // Allocates a zeroed array of T with length "num".  Also inserts
-  // the allocated block into a linked list so it can be deallocated
-  // when we are all done.
-  template <class T> T* New(int num) {
-    void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T));
-    memset(ptr, 0, sizeof(Object) + num*sizeof(T));
-    Object* obj = reinterpret_cast<Object*>(ptr);
-    obj->next = allocated_;
-    allocated_ = obj;
-    return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1);
-  }
-};
-
-// More implementation details follow:
-
-template <class Value>
-AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc)
-  : free_(NULL),
-    alloc_(alloc),
-    dealloc_(dealloc),
-    allocated_(NULL) {
-  hashtable_ = New<Cluster*>(kHashSize);
-}
-
-template <class Value>
-AddressMap<Value>::~AddressMap() {
-  // De-allocate all of the objects we allocated
-  for (Object* obj = allocated_; obj != NULL; /**/) {
-    Object* next = obj->next;
-    (*dealloc_)(obj);
-    obj = next;
-  }
-}
-
-template <class Value>
-inline const Value* AddressMap<Value>::Find(Key key) const {
-  return const_cast<AddressMap*>(this)->FindMutable(key);
-}
-
-template <class Value>
-inline Value* AddressMap<Value>::FindMutable(Key key) {
-  const Number num = reinterpret_cast<Number>(key);
-  const Cluster* const c = FindCluster(num, false/*do not create*/);
-  if (c != NULL) {
-    for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
-      if (e->key == key) {
-        return &e->value;
-      }
-    }
-  }
-  return NULL;
-}
-
-template <class Value>
-void AddressMap<Value>::Insert(Key key, Value value) {
-  const Number num = reinterpret_cast<Number>(key);
-  Cluster* const c = FindCluster(num, true/*create*/);
-
-  // Look in linked-list for this block
-  const int block = BlockID(num);
-  for (Entry* e = c->blocks[block]; e != NULL; e = e->next) {
-    if (e->key == key) {
-      e->value = value;
-      return;
-    }
-  }
-
-  // Create entry
-  if (free_ == NULL) {
-    // Allocate a new batch of entries and add to free-list
-    Entry* array = New<Entry>(ALLOC_COUNT);
-    for (int i = 0; i < ALLOC_COUNT-1; i++) {
-      array[i].next = &array[i+1];
-    }
-    array[ALLOC_COUNT-1].next = free_;
-    free_ = &array[0];
-  }
-  Entry* e = free_;
-  free_ = e->next;
-  e->key = key;
-  e->value = value;
-  e->next = c->blocks[block];
-  c->blocks[block] = e;
-}
-
-template <class Value>
-bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
-  const Number num = reinterpret_cast<Number>(key);
-  Cluster* const c = FindCluster(num, false/*do not create*/);
-  if (c != NULL) {
-    for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) {
-      Entry* e = *p;
-      if (e->key == key) {
-        *removed_value = e->value;
-        *p = e->next;         // Remove e from linked-list
-        e->next = free_;      // Add e to free-list
-        free_ = e;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-template <class Value>
-const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func,
-                                           size_t max_size,
-                                           Key key,
-                                           Key* res_key) {
-  const Number key_num = reinterpret_cast<Number>(key);
-  Number num = key_num;  // we'll move this to move back through the clusters
-  while (1) {
-    const Cluster* c = FindCluster(num, false/*do not create*/);
-    if (c != NULL) {
-      while (1) {
-        const int block = BlockID(num);
-        bool had_smaller_key = false;
-        for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) {
-          const Number e_num = reinterpret_cast<Number>(e->key);
-          if (e_num <= key_num) {
-            if (e_num == key_num  ||  // to handle 0-sized ranges
-                key_num < e_num + (*size_func)(e->value)) {
-              *res_key = e->key;
-              return &e->value;
-            }
-            had_smaller_key = true;
-          }
-        }
-        if (had_smaller_key) return NULL;  // got a range before 'key'
-                                           // and it did not contain 'key'
-        if (block == 0) break;
-        // try address-wise previous block
-        num |= kBlockSize - 1;  // start at the last addr of prev block
-        num -= kBlockSize;
-        if (key_num - num > max_size) return NULL;
-      }
-    }
-    if (num < kClusterSize) return NULL;  // first cluster
-    // go to address-wise previous cluster to try
-    num |= kClusterSize - 1;  // start at the last block of previous cluster
-    num -= kClusterSize;
-    if (key_num - num > max_size) return NULL;
-      // Having max_size to limit the search is crucial: else
-      // we have to traverse a lot of empty clusters (or blocks).
-      // We can avoid needing max_size if we put clusters into
-      // a search tree, but performance suffers considerably
-      // if we use this approach by using stl::set.
-  }
-}
-
-template <class Value>
-template <class Type>
-inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type),
-                                       Type arg) const {
-  // We could optimize this by traversing only non-empty clusters and/or blocks
-  // but it does not speed up heap-checker noticeably.
-  for (int h = 0; h < kHashSize; ++h) {
-    for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
-      for (int b = 0; b < kClusterBlocks; ++b) {
-        for (Entry* e = c->blocks[b]; e != NULL; e = e->next) {
-          callback(e->key, &e->value, arg);
-        }
-      }
-    }
-  }
-}
-
-#endif  // BASE_ADDRESSMAP_INL_H_
diff --git a/contrib/libtcmalloc/src/base/arm_instruction_set_select.h b/contrib/libtcmalloc/src/base/arm_instruction_set_select.h
deleted file mode 100644
index 6fde685272c..00000000000
--- a/contrib/libtcmalloc/src/base/arm_instruction_set_select.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: Alexander Levitskiy
-//
-// Generalizes the plethora of ARM flavors available to an easier to manage set
-// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto
-
-#ifndef ARM_INSTRUCTION_SET_SELECT_H_
-#define ARM_INSTRUCTION_SET_SELECT_H_
-
-#if defined(__ARM_ARCH_8A__)
-# define ARMV8 1
-#endif
-
-#if defined(ARMV8) || \
-    defined(__ARM_ARCH_7__) || \
-    defined(__ARM_ARCH_7R__) || \
-    defined(__ARM_ARCH_7A__)
-# define ARMV7 1
-#endif
-
-#if defined(ARMV7) || \
-    defined(__ARM_ARCH_6__) || \
-    defined(__ARM_ARCH_6J__) || \
-    defined(__ARM_ARCH_6K__) || \
-    defined(__ARM_ARCH_6Z__) || \
-    defined(__ARM_ARCH_6T2__) || \
-    defined(__ARM_ARCH_6ZK__)
-# define ARMV6 1
-#endif
-
-#if defined(ARMV6) || \
-    defined(__ARM_ARCH_5T__) || \
-    defined(__ARM_ARCH_5E__) || \
-    defined(__ARM_ARCH_5TE__) || \
-    defined(__ARM_ARCH_5TEJ__)
-# define ARMV5 1
-#endif
-
-#if defined(ARMV5) || \
-    defined(__ARM_ARCH_4__) || \
-    defined(__ARM_ARCH_4T__)
-# define ARMV4 1
-#endif
-
-#if defined(ARMV4) || \
-    defined(__ARM_ARCH_3__) || \
-    defined(__ARM_ARCH_3M__)
-# define ARMV3 1
-#endif
-
-#if defined(ARMV3) || \
-    defined(__ARM_ARCH_2__)
-# define ARMV2 1
-#endif
-
-#endif  // ARM_INSTRUCTION_SET_SELECT_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h b/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h
deleted file mode 100644
index d0f941309bb..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-arm-generic.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2003, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Lei Zhang, Sasha Levitskiy
-//
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// LinuxKernelCmpxchg is from Google Gears.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
-#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 0xffff0fc0 is the hard coded address of a function provided by
-// the kernel which implements an atomic compare-exchange. On older
-// ARM architecture revisions (pre-v6) this may be implemented using
-// a syscall. This address is stable, and in active use (hard coded)
-// by at least glibc-2.7 and the Android C library.
-// pLinuxKernelCmpxchg has both acquire and release barrier sematincs.
-typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
-                                           Atomic32 new_value,
-                                           volatile Atomic32* ptr);
-LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK =
-    (LinuxKernelCmpxchgFunc) 0xffff0fc0;
-
-typedef void (*LinuxKernelMemoryBarrierFunc)(void);
-LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
-    (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
-
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value = *ptr;
-  do {
-    if (!pLinuxKernelCmpxchg(old_value, new_value,
-                             const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (pLinuxKernelCmpxchg(old_value, new_value,
-                               const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void MemoryBarrier() {
-  pLinuxKernelMemoryBarrier();
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-
-// 64-bit versions are not implemented yet.
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("NoBarrier_Store");
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Acquire_Store64");
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Release_Store");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Acquire_Load");
-  return 0;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Release_Load");
-  return 0;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Release_CompareAndSwap");
-  return 0;
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h b/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h
deleted file mode 100644
index 35f10481b04..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-arm-v6plus.h
+++ /dev/null
@@ -1,330 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Sasha Levitskiy
-// based on atomicops-internals by Sanjay Ghemawat
-//
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// This code implements ARM atomics for architectures V6 and  newer.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
-#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"  // For COMPILE_ASSERT
-
-// The LDREXD and STREXD instructions in ARM all v7 variants or above.  In v6,
-// only some variants support it.  For simplicity, we only use exclusive
-// 64-bit load/store in V7 or above.
-#if defined(ARMV7)
-# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-#endif
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 32-bit low-level ops
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 oldval, res;
-  do {
-    __asm__ __volatile__(
-    "ldrex   %1, [%3]\n"
-    "mov     %0, #0\n"
-    "teq     %1, %4\n"
-    // The following IT (if-then) instruction is needed for the subsequent
-    // conditional instruction STREXEQ when compiling in THUMB mode.
-    // In ARM mode, the compiler/assembler will not generate any code for it.
-    "it      eq\n"
-    "strexeq %0, %5, [%3]\n"
-        : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
-        : "r" (ptr), "Ir" (old_value), "r" (new_value)
-        : "cc");
-  } while (res);
-  return oldval;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  Atomic32 tmp, old;
-  __asm__ __volatile__(
-      "1:\n"
-      "ldrex  %1, [%2]\n"
-      "strex  %0, %3, [%2]\n"
-      "teq    %0, #0\n"
-      "bne    1b"
-      : "=&r" (tmp), "=&r" (old)
-      : "r" (ptr), "r" (new_value)
-      : "cc", "memory");
-  return old;
-}
-
-inline void MemoryBarrier() {
-#if !defined(ARMV7)
-  uint32_t dest = 0;
-  __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
-#else
-  __asm__ __volatile__("dmb" : : : "memory");
-#endif
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-  MemoryBarrier();
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  MemoryBarrier();
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  MemoryBarrier();
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit versions are only available if LDREXD and STREXD instructions
-// are available.
-#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-#define BASE_HAS_ATOMIC64 1
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 oldval, res;
-  do {
-    __asm__ __volatile__(
-    "ldrexd   %1, [%3]\n"
-    "mov      %0, #0\n"
-    "teq      %Q1, %Q4\n"
-    // The following IT (if-then) instructions are needed for the subsequent
-    // conditional instructions when compiling in THUMB mode.
-    // In ARM mode, the compiler/assembler will not generate any code for it.
-    "it       eq\n"
-    "teqeq    %R1, %R4\n"
-    "it       eq\n"
-    "strexdeq %0, %5, [%3]\n"
-        : "=&r" (res), "=&r" (oldval), "+Q" (*ptr)
-        : "r" (ptr), "Ir" (old_value), "r" (new_value)
-        : "cc");
-  } while (res);
-  return oldval;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  int store_failed;
-  Atomic64 old;
-  __asm__ __volatile__(
-      "1:\n"
-      "ldrexd  %1, [%2]\n"
-      "strexd  %0, %3, [%2]\n"
-      "teq     %0, #0\n"
-      "bne     1b"
-      : "=&r" (store_failed), "=&r" (old)
-      : "r" (ptr), "r" (new_value)
-      : "cc", "memory");
-  return old;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-  MemoryBarrier();
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  MemoryBarrier();
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  int store_failed;
-  Atomic64 dummy;
-  __asm__ __volatile__(
-      "1:\n"
-      // Dummy load to lock cache line.
-      "ldrexd  %1, [%3]\n"
-      "strexd  %0, %2, [%3]\n"
-      "teq     %0, #0\n"
-      "bne     1b"
-      : "=&r" (store_failed), "=&r"(dummy)
-      : "r"(value), "r" (ptr)
-      : "cc", "memory");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 res;
-  __asm__ __volatile__(
-  "ldrexd   %0, [%1]\n"
-  "clrex\n"
-      : "=r" (res)
-      : "r"(ptr), "Q"(*ptr));
-  return res;
-}
-
-#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Acquire_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  NotImplementedFatalError("Release_AtomicExchange");
-  return 0;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("NoBarrier_Store");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
-}
-
-#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  MemoryBarrier();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  MemoryBarrier();
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-}  // namespace subtle ends
-}  // namespace base ends
-
-#endif  // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h b/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h
deleted file mode 100644
index f8d27863cb7..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-gcc.h
+++ /dev/null
@@ -1,203 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2014, Linaro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ---
-//
-// Author: Riku Voipio, riku.voipio@linaro.org
-//
-// atomic primitives implemented with gcc atomic intrinsics:
-// http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
-//
-
-#ifndef BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
-#define BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-    __sync_synchronize();
-}
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELAXED);
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value,  __ATOMIC_ACQUIRE);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELEASE);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit versions
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELAXED);
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value,  __ATOMIC_ACQUIRE);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELEASE);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value = old_value;
-  __atomic_compare_exchange_n(ptr, &prev_value, new_value, 
-          0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
-  return prev_value;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h b/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h
deleted file mode 100644
index b52fdf0d1ec..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-linuxppc.h
+++ /dev/null
@@ -1,437 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- */
-
-// Implementation of atomic operations for ppc-linux.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
-#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
-
-typedef int32_t Atomic32;
-
-#ifdef __PPC64__
-#define BASE_HAS_ATOMIC64 1
-#endif
-
-namespace base {
-namespace subtle {
-
-static inline void _sync(void) {
-  __asm__ __volatile__("sync": : : "memory");
-}
-
-static inline void _lwsync(void) {
-  // gcc defines __NO_LWSYNC__ when appropriate; see
-  //    http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html
-#ifdef __NO_LWSYNC__
-  __asm__ __volatile__("msync": : : "memory");
-#else
-  __asm__ __volatile__("lwsync": : : "memory");
-#endif
-}
-
-static inline void _isync(void) {
-  __asm__ __volatile__("isync": : : "memory");
-}
-
-static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) {
-  Atomic32 t;
-  __asm__ __volatile__(
-"1:		lwarx   %0,0,%3\n\
-		add     %0,%2,%0\n\
-		stwcx.  %0,0,%3 \n\
-		bne-    1b"
-		: "=&r" (t), "+m" (*value)
-		: "r" (amount), "r" (value)
-                : "cc");
-  return t;
-}
-
-static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) {
-  Atomic32 t;
-  _lwsync();
-  t = OSAtomicAdd32(amount, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in OSAtomicAdd32) has a
-  // conditional branch with a data dependency on the update.
-  // Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value,
-                                            Atomic32 new_value,
-                                            Atomic32 *value) {
-  Atomic32 prev;
-  __asm__ __volatile__(
-"1:		lwarx   %0,0,%2\n\
-		cmpw    0,%0,%3\n\
-		bne-    2f\n\
-		stwcx.  %4,0,%2\n\
-		bne-    1b\n\
-2:"
-                : "=&r" (prev), "+m" (*value)
-                : "r" (value), "r" (old_value), "r" (new_value)
-                : "cc");
-  return prev == old_value;
-}
-
-static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value,
-                                                       Atomic32 new_value,
-                                                       Atomic32 *value) {
-  Atomic32 t;
-  t = OSAtomicCompareAndSwap32(old_value, new_value, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in
-  // OSAtomicCompareAndSwap32) has a conditional branch with a data
-  // dependency on the update.  Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value,
-                                                       Atomic32 new_value,
-                                                       Atomic32 *value) {
-  _lwsync();
-  return OSAtomicCompareAndSwap32(old_value, new_value, value);
-}
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-// 32-bit Versions.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32(old_value, new_value,
-                                 const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
-                                     const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Acquire(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Release(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-#ifdef __PPC64__
-
-// 64-bit Versions.
-
-static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) {
-  Atomic64 t;
-  __asm__ __volatile__(
-"1:		ldarx   %0,0,%3\n\
-		add     %0,%2,%0\n\
-		stdcx.  %0,0,%3 \n\
-		bne-    1b"
-		: "=&r" (t), "+m" (*value)
-		: "r" (amount), "r" (value)
-                : "cc");
-  return t;
-}
-
-static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) {
-  Atomic64 t;
-  _lwsync();
-  t = OSAtomicAdd64(amount, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in OSAtomicAdd64) has a
-  // conditional branch with a data dependency on the update.
-  // Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value,
-                                            Atomic64 new_value,
-                                            Atomic64 *value) {
-  Atomic64 prev;
-  __asm__ __volatile__(
-"1:		ldarx   %0,0,%2\n\
-		cmpd    0,%0,%3\n\
-		bne-    2f\n\
-		stdcx.  %4,0,%2\n\
-		bne-    1b\n\
-2:"
-                : "=&r" (prev), "+m" (*value)
-                : "r" (value), "r" (old_value), "r" (new_value)
-                : "cc");
-  return prev == old_value;
-}
-
-static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value,
-                                                       Atomic64 new_value,
-                                                       Atomic64 *value) {
-  Atomic64 t;
-  t = OSAtomicCompareAndSwap64(old_value, new_value, value);
-  // This is based on the code snippet in the architecture manual (Vol
-  // 2, Appendix B).  It's a little tricky: correctness depends on the
-  // fact that the code right before this (in
-  // OSAtomicCompareAndSwap64) has a conditional branch with a data
-  // dependency on the update.  Otherwise, we'd have to use sync.
-  _isync();
-  return t;
-}
-
-static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value,
-                                                       Atomic64 new_value,
-                                                       Atomic64 *value) {
-  _lwsync();
-  return OSAtomicCompareAndSwap64(old_value, new_value, value);
-}
-
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64(old_value, new_value,
-                                 const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
-                                         Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
-                                     const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Acquire(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Release(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-#endif
-
-inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  _lwsync();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
-  Atomic32 value = *ptr;
-  _lwsync();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-  return *ptr;
-}
-
-#ifdef __PPC64__
-
-// 64-bit Versions.
-
-inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  _lwsync();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = *ptr;
-  _lwsync();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  // This can't be _lwsync(); we need to order the immediately
-  // preceding stores against any load that may follow, but lwsync
-  // doesn't guarantee that.
-  _sync();
-  return *ptr;
-}
-
-#endif
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h b/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h
deleted file mode 100644
index b5130d4f4d7..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-macosx.h
+++ /dev/null
@@ -1,370 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Implementation of atomic operations for Mac OS X.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_
-#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_
-
-typedef int32_t Atomic32;
-
-// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different
-// on the Mac, even when they are the same size.  Similarly, on __ppc64__,
-// AtomicWord and Atomic64 are always different.  Thus, we need explicit
-// casting.
-#ifdef __LP64__
-#define AtomicWordCastType base::subtle::Atomic64
-#else
-#define AtomicWordCastType Atomic32
-#endif
-
-#if defined(__LP64__) || defined(__i386__)
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-#endif
-
-#include <libkern/OSAtomic.h>
-
-namespace base {
-namespace subtle {
-
-#if !defined(__LP64__) && defined(__ppc__)
-
-// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC,
-// while the underlying assembly instructions are available only some
-// implementations of PowerPC.
-
-// The following inline functions will fail with the error message at compile
-// time ONLY IF they are called.  So it is safe to use this header if user
-// code only calls AtomicWord and Atomic32 operations.
-//
-// NOTE(vchen): Implementation notes to implement the atomic ops below may
-// be found in "PowerPC Virtual Environment Architecture, Book II,
-// Version 2.02", January 28, 2005, Appendix B, page 46.  Unfortunately,
-// extra care must be taken to ensure data are properly 8-byte aligned, and
-// that data are returned correctly according to Mac OS X ABI specs.
-
-inline int64_t OSAtomicCompareAndSwap64(
-    int64_t oldValue, int64_t newValue, int64_t *theValue) {
-  __asm__ __volatile__(
-      "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t");
-  return 0;
-}
-
-inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) {
-  __asm__ __volatile__(
-      "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t");
-  return 0;
-}
-
-inline int64_t OSAtomicCompareAndSwap64Barrier(
-    int64_t oldValue, int64_t newValue, int64_t *theValue) {
-  int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue);
-  OSMemoryBarrier();
-  return prev;
-}
-
-inline int64_t OSAtomicAdd64Barrier(
-    int64_t theAmount, int64_t *theValue) {
-  int64_t new_val = OSAtomicAdd64(theAmount, theValue);
-  OSMemoryBarrier();
-  return new_val;
-}
-#endif
-
-typedef int64_t Atomic64;
-
-inline void MemoryBarrier() {
-  OSMemoryBarrier();
-}
-
-// 32-bit Versions.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32(old_value, new_value,
-                                 const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32(old_value, new_value,
-                                     const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
-                                            const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
-                                       Atomic32 new_value) {
-  return Acquire_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap32Barrier(old_value, new_value,
-                                        const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32 *ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit version
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64(old_value, new_value,
-                                 const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr,
-                                         Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64(old_value, new_value,
-                                     const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_value;
-  do {
-    old_value = *ptr;
-  } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
-                                            const_cast<Atomic64*>(ptr)));
-  return old_value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
-                                       Atomic64 new_value) {
-  return Acquire_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 prev_value;
-  do {
-    if (OSAtomicCompareAndSwap64Barrier(old_value, new_value,
-                                        const_cast<Atomic64*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  // The lib kern interface does not distinguish between
-  // Acquire and Release memory barriers; they are equivalent.
-  return Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-#ifdef __LP64__
-
-// 64-bit implementation on 64-bit platform
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else
-
-// 64-bit implementation on 32-bit platform
-
-#if defined(__ppc__)
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-   __asm__ __volatile__(
-       "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-   __asm__ __volatile__(
-       "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t");
-   return 0;
-}
-
-#elif defined(__i386__)
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  __asm__ __volatile__("movq %1, %%mm0\n\t"    // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"              // Reset FP registers
-                       : "=m" (*ptr)
-                       : "m" (value)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 value;
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Reset FP registers
-                       : "=m" (value)
-                       : "m" (*ptr)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-
-  return value;
-}
-#endif
-
-
-inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) {
-  MemoryBarrier();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-#endif  // __LP64__
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_MACOSX_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-mips.h b/contrib/libtcmalloc/src/base/atomicops-internals-mips.h
deleted file mode 100644
index 4bfd7f6c70d..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-mips.h
+++ /dev/null
@@ -1,323 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2013, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Author: Jovan Zelincevic <jovan.zelincevic@imgtec.com>
-// based on atomicops-internals by Sanjay Ghemawat
-
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// This code implements MIPS atomics.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_MIPS_H_
-#define BASE_ATOMICOPS_INTERNALS_MIPS_H_
-
-#if (_MIPS_ISA == _MIPS_ISA_MIPS64)
-#define BASE_HAS_ATOMIC64 1
-#endif
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-// Atomically execute:
-// result = *ptr;
-// if (*ptr == old_value)
-// *ptr = new_value;
-// return result;
-//
-// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
-// Always return the old value of "*ptr"
-//
-// This routine implies no memory barriers.
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value)
-{
-    Atomic32 prev, tmp;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "ll     %0,     %5          \n" // prev = *ptr
-        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
-        " move  %2,     %4          \n" // tmp = new_value
-        "sc     %2,     %1          \n" // *ptr = tmp (with atomic check)
-        "beqz   %2,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-    "2:                             \n"
-
-        ".set   pop                 \n"
-        : "=&r" (prev), "=m" (*ptr),
-          "=&r" (tmp)
-        : "Ir" (old_value), "r" (new_value),
-          "m" (*ptr)
-        : "memory"
-    );
-    return prev;
-}
-
-// Atomically store new_value into *ptr, returning the previous value held in
-// *ptr. This routine implies no memory barriers.
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value)
-{
-    Atomic32 temp, old;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "ll     %1,     %2          \n" // old = *ptr
-        "move   %0,     %3          \n" // temp = new_value
-        "sc     %0,     %2          \n" // *ptr = temp (with atomic check)
-        "beqz   %0,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-
-        ".set   pop                 \n"
-        : "=&r" (temp), "=&r" (old),
-          "=m" (*ptr)
-        : "r" (new_value), "m" (*ptr)
-        : "memory"
-    );
-    return old;
-}
-
-inline void MemoryBarrier()
-{
-    __asm__ volatile("sync" : : : "memory");
-}
-
-// "Acquire" operations
-// ensure that no later memory access can be reordered ahead of the operation.
-// "Release" operations ensure that no previous memory access can be reordered
-// after the operation. "Barrier" operations have both "Acquire" and "Release"
-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
-// access.
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value)
-{
-    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    MemoryBarrier();
-    return res;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value)
-{
-    MemoryBarrier();
-    Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    return res;
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    *ptr = value;
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value)
-{
-    Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-    MemoryBarrier();
-    return old_value;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value)
-{
-    MemoryBarrier();
-    return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    *ptr = value;
-    MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value)
-{
-    MemoryBarrier();
-    *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr)
-{
-    return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr)
-{
-    Atomic32 value = *ptr;
-    MemoryBarrier();
-    return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr)
-{
-    MemoryBarrier();
-    return *ptr;
-}
-
-#if (_MIPS_ISA == _MIPS_ISA_MIPS64) || (_MIPS_SIM == _MIPS_SIM_ABI64)
-
-typedef int64_t Atomic64;
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value)
-{
-    Atomic64 prev, tmp;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "lld    %0,     %5          \n" // prev = *ptr
-        "bne    %0,     %3,     2f  \n" // if (prev != old_value) goto 2
-        " move  %2,     %4          \n" // tmp = new_value
-        "scd    %2,     %1          \n" // *ptr = tmp (with atomic check)
-        "beqz   %2,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-    "2:                             \n"
-
-        ".set   pop                 \n"
-        : "=&r" (prev), "=m" (*ptr),
-          "=&r" (tmp)
-        : "Ir" (old_value), "r" (new_value),
-          "m" (*ptr)
-        : "memory"
-    );
-    return prev;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value)
-{
-    Atomic64 temp, old;
-    __asm__ volatile(
-        ".set   push                \n"
-        ".set   noreorder           \n"
-
-    "1:                             \n"
-        "lld    %1,     %2          \n" // old = *ptr
-        "move   %0,     %3          \n" // temp = new_value
-        "scd    %0,     %2          \n" // *ptr = temp (with atomic check)
-        "beqz   %0,     1b          \n" // start again on atomic error
-        " nop                       \n" // delay slot nop
-
-        ".set   pop                 \n"
-        : "=&r" (temp), "=&r" (old),
-          "=m" (*ptr)
-        : "r" (new_value), "m" (*ptr)
-        : "memory"
-    );
-    return old;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value)
-{
-    Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
-    MemoryBarrier();
-    return old_value;
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value)
-{
-    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    MemoryBarrier();
-    return res;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value)
-{
-    MemoryBarrier();
-    Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-    return res;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    *ptr = value;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value)
-{
-    MemoryBarrier();
-    return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    *ptr = value;
-    MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value)
-{
-    MemoryBarrier();
-    *ptr = value;
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr)
-{
-    return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr)
-{
-    Atomic64 value = *ptr;
-    MemoryBarrier();
-    return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr)
-{
-    MemoryBarrier();
-    return *ptr;
-}
-
-#endif
-
-}   // namespace base::subtle
-}   // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_MIPS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-windows.h b/contrib/libtcmalloc/src/base/atomicops-internals-windows.h
deleted file mode 100644
index 93ced8770d4..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-windows.h
+++ /dev/null
@@ -1,457 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// Implementation of atomic operations using Windows API
-// functions.  This file should not be included directly.  Clients
-// should instead include "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
-#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "base/basictypes.h"  // For COMPILE_ASSERT
-
-typedef int32 Atomic32;
-
-#if defined(_WIN64)
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-#endif
-
-namespace base {
-namespace subtle {
-
-typedef int64 Atomic64;
-
-// 32-bit low-level operations on any platform
-
-extern "C" {
-// We use windows intrinsics when we can (they seem to be supported
-// well on MSVC 8.0 and above).  Unfortunately, in some
-// environments, <windows.h> and <intrin.h> have conflicting
-// declarations of some other intrinsics, breaking compilation:
-//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
-// Therefore, we simply declare the relevant intrinsics ourself.
-
-// MinGW has a bug in the header files where it doesn't indicate the
-// first argument is volatile -- they're not up to date.  See
-//   http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html
-// We have to const_cast away the volatile to avoid compiler warnings.
-// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h
-#if defined(__MINGW32__)
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval);
-}
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return ::InterlockedExchange(const_cast<LONG*>(ptr), newval);
-}
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment);
-}
-
-#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
-// Unfortunately, in some environments, <windows.h> and <intrin.h>
-// have conflicting declarations of some intrinsics, breaking
-// compilation.  So we declare the intrinsics we need ourselves.  See
-//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
-LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval);
-#pragma intrinsic(_InterlockedCompareExchange)
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return _InterlockedCompareExchange(ptr, newval, oldval);
-}
-
-LONG _InterlockedExchange(volatile LONG* ptr, LONG newval);
-#pragma intrinsic(_InterlockedExchange)
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return _InterlockedExchange(ptr, newval);
-}
-
-LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment);
-#pragma intrinsic(_InterlockedExchangeAdd)
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return _InterlockedExchangeAdd(ptr, increment);
-}
-
-#else
-inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
-                                           LONG newval, LONG oldval) {
-  return ::InterlockedCompareExchange(ptr, newval, oldval);
-}
-inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
-  return ::InterlockedExchange(ptr, newval);
-}
-inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
-  return ::InterlockedExchangeAdd(ptr, increment);
-}
-
-#endif  // ifdef __MINGW32__
-}  // extern "C"
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  LONG result = FastInterlockedCompareExchange(
-      reinterpret_cast<volatile LONG*>(ptr),
-      static_cast<LONG>(new_value),
-      static_cast<LONG>(old_value));
-  return static_cast<Atomic32>(result);
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  LONG result = FastInterlockedExchange(
-      reinterpret_cast<volatile LONG*>(ptr),
-      static_cast<LONG>(new_value));
-  return static_cast<Atomic32>(result);
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-
-// In msvc8/vs2005, winnt.h already contains a definition for
-// MemoryBarrier in the global namespace.  Add it there for earlier
-// versions and forward to it from within the namespace.
-#if !(defined(_MSC_VER) && _MSC_VER >= 1400)
-inline void MemoryBarrier() {
-  Atomic32 value = 0;
-  base::subtle::NoBarrier_AtomicExchange(&value, 0);
-                        // actually acts as a barrier in thisd implementation
-}
-#endif
-
-namespace base {
-namespace subtle {
-
-inline void MemoryBarrier() {
-  ::MemoryBarrier();
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  Acquire_AtomicExchange(ptr, value);
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
-  // See comments in Atomic64 version of Release_Store() below.
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-// 64-bit operations
-
-#if defined(_WIN64) || defined(__MINGW64__)
-
-// 64-bit low-level operations on 64-bit platform.
-
-COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic);
-
-// These are the intrinsics needed for 64-bit operations.  Similar to the
-// 32-bit case above.
-
-extern "C" {
-#if defined(__MINGW64__)
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
-                                             newval, oldval);
-}
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
-}
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                             LONGLONG increment) {
-  return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
-}
-
-#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
-// Like above, we need to declare the intrinsics ourselves.
-PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                         PVOID newval, PVOID oldval);
-#pragma intrinsic(_InterlockedCompareExchangePointer)
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
-                                            newval, oldval);
-}
-
-PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval);
-#pragma intrinsic(_InterlockedExchangePointer)
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
-}
-
-LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment);
-#pragma intrinsic(_InterlockedExchangeAdd64)
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                             LONGLONG increment) {
-  return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
-}
-
-#else
-inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
-                                                   PVOID newval, PVOID oldval) {
-  return ::InterlockedCompareExchangePointer(ptr, newval, oldval);
-}
-inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
-  return ::InterlockedExchangePointer(ptr, newval);
-}
-inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
-                                         LONGLONG increment) {
-  return ::InterlockedExchangeAdd64(ptr, increment);
-}
-
-#endif  // ifdef __MINGW64__
-}  // extern "C"
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  PVOID result = FastInterlockedCompareExchangePointer(
-    reinterpret_cast<volatile PVOID*>(ptr),
-    reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value));
-  return reinterpret_cast<Atomic64>(result);
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  PVOID result = FastInterlockedExchangePointer(
-    reinterpret_cast<volatile PVOID*>(ptr),
-    reinterpret_cast<PVOID>(new_value));
-  return reinterpret_cast<Atomic64>(result);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_AtomicExchange(ptr, value);
-              // acts as a barrier in this implementation
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
-
-  // When new chips come out, check:
-  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
-  //  System Programming Guide, Chatper 7: Multiple-processor management,
-  //  Section 7.2, Memory Ordering.
-  // Last seen at:
-  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr;
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else  // defined(_WIN64) || defined(__MINGW64__)
-
-// 64-bit low-level operations on 32-bit platform
-
-// TODO(vchen): The GNU assembly below must be converted to MSVC inline
-// assembly.  Then the file should be renamed to ...-x86-msvc.h, probably.
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-#if 0 // Not implemented
-  Atomic64 prev;
-  __asm__ __volatile__("movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
-                       "lock; cmpxchg8b %1\n\t"  // If edx:eax (old_value) same
-                       : "=A" (prev)             // as contents of ptr:
-                       : "m" (*ptr),             //   ecx:ebx => ptr
-                         "0" (old_value),        // else:
-                         "r" (&new_value)        //   old *ptr => edx:eax
-                       : "memory", "%ebx", "%ecx");
-  return prev;
-#else
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-#endif
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-#if 0 // Not implemented
-  __asm__ __volatile__(
-                       "movl (%2), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%2), %%ecx\n\t"   // ecx:ebx
-                       "0:\n\t"
-                       "movl %1, %%eax\n\t"      // Read contents of ptr into
-                       "movl 4%1, %%edx\n\t"     // edx:eax
-                       "lock; cmpxchg8b %1\n\t"  // Attempt cmpxchg; if *ptr
-                       "jnz 0b\n\t"              // is no longer edx:eax, loop
-                       : "=A" (new_value)
-                       : "m" (*ptr),
-                         "r" (&new_value)
-                       : "memory", "%ebx", "%ecx");
-  return new_value;  // Now it's the previous value.
-#else
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-#endif
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value)
-{
- 	__asm {
-    	movq mm0, value;  // Use mmx reg for 64-bit atomic moves
-    	mov eax, ptrValue;
-    	movq [eax], mm0;
-    	emms;            // Empty mmx state to enable FP registers
-  	}
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_AtomicExchange(ptr, value);
-              // acts as a barrier in this implementation
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue)
-{
-  	Atomic64 value;
-  	__asm {
-    	mov eax, ptrValue;
-    	movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves
-    	movq value, mm0;
-    	emms; // Empty mmx state to enable FP registers
-  }
-  return value;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-#endif  // defined(_WIN64) || defined(__MINGW64__)
-
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // FastInterlockedExchange has both acquire and release memory barriers.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc b/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc
deleted file mode 100644
index c3391e78234..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-x86.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2007, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This module gets enough CPU information to optimize the
- * atomicops module on x86.
- */
-
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include <string.h>
-
-// This file only makes sense with atomicops-internals-x86.h -- it
-// depends on structs that are defined in that file.  If atomicops.h
-// doesn't sub-include that file, then we aren't needed, and shouldn't
-// try to do anything.
-#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_
-
-// Inline cpuid instruction.  In PIC compilations, %ebx contains the address
-// of the global offset table.  To avoid breaking such executables, this code
-// must preserve that register's value across cpuid instructions.
-#if defined(__i386__)
-#define cpuid(a, b, c, d, inp) \
-  asm ("mov %%ebx, %%edi\n"    \
-       "cpuid\n"               \
-       "xchg %%edi, %%ebx\n"   \
-       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
-#elif defined (__x86_64__)
-#define cpuid(a, b, c, d, inp) \
-  asm ("mov %%rbx, %%rdi\n"    \
-       "cpuid\n"               \
-       "xchg %%rdi, %%rbx\n"   \
-       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
-#endif
-
-#if defined(cpuid)        // initialize the struct only on x86
-
-// Set the flags so that code will run correctly and conservatively
-// until InitGoogle() is called.
-struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {
-  false,          // no SSE2
-  false           // no cmpxchg16b
-};
-
-// Initialize the AtomicOps_Internalx86CPUFeatures struct.
-static void AtomicOps_Internalx86CPUFeaturesInit() {
-  uint32 eax;
-  uint32 ebx;
-  uint32 ecx;
-  uint32 edx;
-
-  // Get vendor string (issue CPUID with eax = 0)
-  cpuid(eax, ebx, ecx, edx, 0);
-  char vendor[13];
-  memcpy(vendor, &ebx, 4);
-  memcpy(vendor + 4, &edx, 4);
-  memcpy(vendor + 8, &ecx, 4);
-  vendor[12] = 0;
-
-  // get feature flags in ecx/edx, and family/model in eax
-  cpuid(eax, ebx, ecx, edx, 1);
-
-  int family = (eax >> 8) & 0xf;        // family and model fields
-  int model = (eax >> 4) & 0xf;
-  if (family == 0xf) {                  // use extended family and model fields
-    family += (eax >> 20) & 0xff;
-    model += ((eax >> 16) & 0xf) << 4;
-  }
-
-  // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
-  AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
-
-  // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
-  AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
-}
-
-REGISTER_MODULE_INITIALIZER(atomicops_x86, {
-  AtomicOps_Internalx86CPUFeaturesInit();
-});
-
-#endif
-
-#endif  /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */
diff --git a/contrib/libtcmalloc/src/base/atomicops-internals-x86.h b/contrib/libtcmalloc/src/base/atomicops-internals-x86.h
deleted file mode 100644
index e441ac7e673..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops-internals-x86.h
+++ /dev/null
@@ -1,391 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// Implementation of atomic operations for x86.  This file should not
-// be included directly.  Clients should instead include
-// "base/atomicops.h".
-
-#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
-#define BASE_ATOMICOPS_INTERNALS_X86_H_
-#include "base/basictypes.h"
-
-typedef int32_t Atomic32;
-#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
-
-
-// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
-// already matches Atomic32 or Atomic64, depending on the platform.
-
-
-// This struct is not part of the public API of this module; clients may not
-// use it.
-// Features of this x86.  Values may not be correct before main() is run,
-// but are set conservatively.
-struct AtomicOps_x86CPUFeatureStruct {
-  bool has_sse2;            // Processor has SSE2.
-  bool has_cmpxchg16b;      // Processor supports cmpxchg16b instruction.
-};
-
-ATTRIBUTE_VISIBILITY_HIDDEN
-extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
-
-
-#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
-
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 32-bit low-level operations on any platform.
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev;
-  __asm__ __volatile__("lock; cmpxchgl %1,%2"
-                       : "=a" (prev)
-                       : "q" (new_value), "m" (*ptr), "0" (old_value)
-                       : "memory");
-  return prev;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
-                       : "=r" (new_value)
-                       : "m" (*ptr), "0" (new_value)
-                       : "memory");
-  return new_value;  // Now it's the previous value.
-}
-
-inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
-  return old_val;
-}
-
-inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
-                                       Atomic32 new_value) {
-  // xchgl already has release memory barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  return x;
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-#if defined(__x86_64__)
-
-// 64-bit implementations of memory barrier can be simpler, because it
-// "mfence" is guaranteed to exist.
-inline void MemoryBarrier() {
-  __asm__ __volatile__("mfence" : : : "memory");
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-#else
-
-inline void MemoryBarrier() {
-  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
-    __asm__ __volatile__("mfence" : : : "memory");
-  } else { // mfence is faster but not present on PIII
-    Atomic32 x = 0;
-    Acquire_AtomicExchange(&x, 0);
-  }
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
-    *ptr = value;
-    __asm__ __volatile__("mfence" : : : "memory");
-  } else {
-    Acquire_AtomicExchange(ptr, value);
-  }
-}
-#endif
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-  *ptr = value; // An x86 store acts as a release barrier.
-  // See comments in Atomic64 version of Release_Store(), below.
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
-  // See comments in Atomic64 version of Release_Store(), below.
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#if defined(__x86_64__)
-
-// 64-bit low-level operations on 64-bit platform.
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  Atomic64 prev;
-  __asm__ __volatile__("lock; cmpxchgq %1,%2"
-                       : "=a" (prev)
-                       : "q" (new_value), "m" (*ptr), "0" (old_value)
-                       : "memory");
-  return prev;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
-                       : "=r" (new_value)
-                       : "m" (*ptr), "0" (new_value)
-                       : "memory");
-  return new_value;  // Now it's the previous value.
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
-  return old_val;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_value) {
-  // xchgq already has release memory barrier semantics.
-  return NoBarrier_AtomicExchange(ptr, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-
-  *ptr = value; // An x86 store acts as a release barrier
-                // for current AMD/Intel chips as of Jan 2008.
-                // See also Acquire_Load(), below.
-
-  // When new chips come out, check:
-  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
-  //  System Programming Guide, Chatper 7: Multiple-processor management,
-  //  Section 7.2, Memory Ordering.
-  // Last seen at:
-  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
-  //
-  // x86 stores/loads fail to act as barriers for a few instructions (clflush
-  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
-  // not generated by the compiler, and are rare.  Users of these instructions
-  // need to know about cache behaviour in any case since all of these involve
-  // either flushing cache lines or non-temporal cache hints.
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  return *ptr;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
-                         // for current AMD/Intel chips as of Jan 2008.
-                         // See also Release_Store(), above.
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-#else // defined(__x86_64__)
-
-// 64-bit low-level operations on 32-bit platform.
-
-#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
-// For compilers older than gcc 4.1, we use inline asm.
-//
-// Potential pitfalls:
-//
-// 1. %ebx points to Global offset table (GOT) with -fPIC.
-//    We need to preserve this register.
-// 2. When explicit registers are used in inline asm, the
-//    compiler may not be aware of it and might try to reuse
-//    the same register for another argument which has constraints
-//    that allow it ("r" for example).
-
-inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
-                                            Atomic64 old_value,
-                                            Atomic64 new_value) {
-  Atomic64 prev;
-  __asm__ __volatile__("push %%ebx\n\t"
-                       "movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
-                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
-                       "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
-                       "pop %%ebx\n\t"
-                       : "=A" (prev)             // as contents of ptr:
-                       : "D" (ptr),              //   ecx:ebx => ptr
-                         "0" (old_value),        // else:
-                         "S" (&new_value)        //   old *ptr => edx:eax
-                       : "memory", "%ecx");
-  return prev;
-}
-#endif  // Compiler < gcc-4.1
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_val,
-                                         Atomic64 new_val) {
-  return __sync_val_compare_and_swap(ptr, old_val, new_val);
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_val) {
-  Atomic64 old_val;
-
-  do {
-    old_val = *ptr;
-  } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
-
-  return old_val;
-}
-
-inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_val) {
-  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
-  return old_val;
-}
-
-inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
-                                       Atomic64 new_val) {
- return NoBarrier_AtomicExchange(ptr, new_val);
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Empty mmx state/Reset FP regs
-                       : "=m" (*ptr)
-                       : "m" (value)
-                       : // mark the FP stack and mmx registers as clobbered
-			 "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_Store(ptr, value);
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  ATOMICOPS_COMPILER_BARRIER();
-  NoBarrier_Store(ptr, value);
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  Atomic64 value;
-  __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
-                       "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
-                       "emms\n\t"            // Empty mmx state/Reset FP regs
-                       : "=m" (value)
-                       : "m" (*ptr)
-                       : // mark the FP stack and mmx registers as clobbered
-                         "st", "st(1)", "st(2)", "st(3)", "st(4)",
-                         "st(5)", "st(6)", "st(7)", "mm0", "mm1",
-                         "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
-  return value;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  ATOMICOPS_COMPILER_BARRIER();
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-#endif // defined(__x86_64__)
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-  return x;
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-} // namespace base::subtle
-} // namespace base
-
-#undef ATOMICOPS_COMPILER_BARRIER
-
-#endif  // BASE_ATOMICOPS_INTERNALS_X86_H_
diff --git a/contrib/libtcmalloc/src/base/atomicops.h b/contrib/libtcmalloc/src/base/atomicops.h
deleted file mode 100644
index 46a4b9bb7a2..00000000000
--- a/contrib/libtcmalloc/src/base/atomicops.h
+++ /dev/null
@@ -1,399 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// For atomic operations on statistics counters, see atomic_stats_counter.h.
-// For atomic operations on sequence numbers, see atomic_sequence_num.h.
-// For atomic operations on reference counts, see atomic_refcount.h.
-
-// Some fast atomic operations -- typically with machine-dependent
-// implementations.  This file may need editing as Google code is
-// ported to different architectures.
-
-// The routines exported by this module are subtle.  If you use them, even if
-// you get the code right, it will depend on careful reasoning about atomicity
-// and memory ordering; it will be less readable, and harder to maintain.  If
-// you plan to use these routines, you should have a good reason, such as solid
-// evidence that performance would otherwise suffer, or there being no
-// alternative.  You should assume only properties explicitly guaranteed by the
-// specifications in this file.  You are almost certainly _not_ writing code
-// just for the x86; if you assume x86 semantics, x86 hardware bugs and
-// implementations on other archtectures will cause your code to break.  If you
-// do not know what you are doing, avoid these routines, and use a Mutex.
-//
-// These following lower-level operations are typically useful only to people
-// implementing higher-level synchronization operations like spinlocks,
-// mutexes, and condition-variables.  They combine CompareAndSwap(), a load, or
-// a store with appropriate memory-ordering instructions.  "Acquire" operations
-// ensure that no later memory access can be reordered ahead of the operation.
-// "Release" operations ensure that no previous memory access can be reordered
-// after the operation.  "Barrier" operations have both "Acquire" and "Release"
-// semantics.   A MemoryBarrier() has "Barrier" semantics, but does no memory
-// access.
-//
-// It is incorrect to make direct assignments to/from an atomic variable.
-// You should use one of the Load or Store routines.  The NoBarrier
-// versions are provided when no barriers are needed:
-//   NoBarrier_Store()
-//   NoBarrier_Load()
-// Although there are currently no compiler enforcement, you are encouraged
-// to use these.  Moreover, if you choose to use base::subtle::Atomic64 type,
-// you MUST use one of the Load or Store routines to get correct behavior
-// on 32-bit platforms.
-//
-// The intent is eventually to put all of these routines in namespace
-// base::subtle
-
-#ifndef THREAD_ATOMICOPS_H_
-#define THREAD_ATOMICOPS_H_
-
-#include "../config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-// ------------------------------------------------------------------------
-// Include the platform specific implementations of the types
-// and operations listed below.  Implementations are to provide Atomic32
-// and Atomic64 operations. If there is a mismatch between intptr_t and
-// the Atomic32 or Atomic64 types for a platform, the platform-specific header
-// should define the macro, AtomicWordCastType in a clause similar to the
-// following:
-// #if ...pointers are 64 bits...
-// # define AtomicWordCastType base::subtle::Atomic64
-// #else
-// # define AtomicWordCastType Atomic32
-// #endif
-// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?)
-// ------------------------------------------------------------------------
-
-#include "base/arm_instruction_set_select.h"
-#define GCC_VERSION (__GNUC__ * 10000                 \
-                     + __GNUC_MINOR__ * 100           \
-                     + __GNUC_PATCHLEVEL__)
-
-#define CLANG_VERSION (__clang_major__ * 10000         \
-                       + __clang_minor__ * 100         \
-                       + __clang_patchlevel__)
-
-#if defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__GNUC__) && GCC_VERSION >= 40700
-#include "base/atomicops-internals-gcc.h"
-#elif defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__clang__) && CLANG_VERSION >= 30400
-#include "base/atomicops-internals-gcc.h"
-#elif defined(__MACH__) && defined(__APPLE__)
-#include "base/atomicops-internals-macosx.h"
-#elif defined(__GNUC__) && defined(ARMV6)
-#include "base/atomicops-internals-arm-v6plus.h"
-#elif defined(ARMV3)
-#include "base/atomicops-internals-arm-generic.h"
-#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
-#include "base/atomicops-internals-x86.h"
-#elif defined(_WIN32)
-#include "base/atomicops-internals-windows.h"
-#elif defined(__linux__) && defined(__PPC__)
-#include "base/atomicops-internals-linuxppc.h"
-#elif defined(__GNUC__) && defined(__mips__)
-#include "base/atomicops-internals-mips.h"
-#elif defined(__GNUC__) && GCC_VERSION >= 40700
-#include "base/atomicops-internals-gcc.h"
-#elif defined(__clang__) && CLANG_VERSION >= 30400
-#include "base/atomicops-internals-gcc.h"
-#else
-#error You need to implement atomic operations for this architecture
-#endif
-
-// Signed type that can hold a pointer and supports the atomic ops below, as
-// well as atomic loads and stores.  Instances must be naturally-aligned.
-typedef intptr_t AtomicWord;
-
-#ifdef AtomicWordCastType
-// ------------------------------------------------------------------------
-// This section is needed only when explicit type casting is required to
-// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32).
-// It also serves to document the AtomicWord interface.
-// ------------------------------------------------------------------------
-
-namespace base {
-namespace subtle {
-
-// Atomically execute:
-//      result = *ptr;
-//      if (*ptr == old_value)
-//        *ptr = new_value;
-//      return result;
-//
-// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value".
-// Always return the old value of "*ptr"
-//
-// This routine implies no memory barriers.
-inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr,
-                                           AtomicWord old_value,
-                                           AtomicWord new_value) {
-  return NoBarrier_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-// Atomically store new_value into *ptr, returning the previous value held in
-// *ptr.  This routine implies no memory barriers.
-inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr,
-                                           AtomicWord new_value) {
-  return NoBarrier_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
-                                         AtomicWord new_value) {
-  return Acquire_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
-                                         AtomicWord new_value) {
-  return Release_AtomicExchange(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
-}
-
-inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Acquire_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Release_CompareAndSwap(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr),
-      old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) {
-  NoBarrier_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Acquire_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Release_Store(
-      reinterpret_cast<volatile AtomicWordCastType*>(ptr), value);
-}
-
-inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) {
-  return NoBarrier_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Acquire_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Release_Load(
-      reinterpret_cast<volatile const AtomicWordCastType*>(ptr));
-}
-
-}  // namespace base::subtle
-}  // namespace base
-#endif  // AtomicWordCastType
-
-// ------------------------------------------------------------------------
-// Commented out type definitions and method declarations for documentation
-// of the interface provided by this module.
-// ------------------------------------------------------------------------
-
-#if 0
-
-// Signed 32-bit type that supports the atomic ops below, as well as atomic
-// loads and stores.  Instances must be naturally aligned.  This type differs
-// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits.
-typedef int32_t Atomic32;
-
-// Corresponding operations on Atomic32
-namespace base {
-namespace subtle {
-
-// Signed 64-bit type that supports the atomic ops below, as well as atomic
-// loads and stores.  Instances must be naturally aligned.  This type differs
-// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits.
-typedef int64_t Atomic64;
-
-Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                  Atomic32 old_value,
-                                  Atomic32 new_value);
-Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
-Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                Atomic32 old_value,
-                                Atomic32 new_value);
-Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                Atomic32 old_value,
-                                Atomic32 new_value);
-void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value);
-void Acquire_Store(volatile Atomic32* ptr, Atomic32 value);
-void Release_Store(volatile Atomic32* ptr, Atomic32 value);
-Atomic32 NoBarrier_Load(volatile const Atomic32* ptr);
-Atomic32 Acquire_Load(volatile const Atomic32* ptr);
-Atomic32 Release_Load(volatile const Atomic32* ptr);
-
-// Corresponding operations on Atomic64
-Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                  Atomic64 old_value,
-                                  Atomic64 new_value);
-Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
-
-Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                Atomic64 old_value,
-                                Atomic64 new_value);
-Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                Atomic64 old_value,
-                                Atomic64 new_value);
-void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value);
-void Acquire_Store(volatile Atomic64* ptr, Atomic64 value);
-void Release_Store(volatile Atomic64* ptr, Atomic64 value);
-Atomic64 NoBarrier_Load(volatile const Atomic64* ptr);
-Atomic64 Acquire_Load(volatile const Atomic64* ptr);
-Atomic64 Release_Load(volatile const Atomic64* ptr);
-}  // namespace base::subtle
-}  // namespace base
-
-void MemoryBarrier();
-
-#endif  // 0
-
-
-// ------------------------------------------------------------------------
-// The following are to be deprecated when all uses have been changed to
-// use the base::subtle namespace.
-// ------------------------------------------------------------------------
-
-#ifdef AtomicWordCastType
-// AtomicWord versions to be deprecated
-inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr,
-                                         AtomicWord old_value,
-                                         AtomicWord new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Acquire_Store(ptr, value);
-}
-
-inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-
-inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-
-inline AtomicWord Release_Load(volatile const AtomicWord* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-#endif  // AtomicWordCastType
-
-// 32-bit Acquire/Release operations to be deprecated.
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  base::subtle::Acquire_Store(ptr, value);
-}
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-
-#ifdef BASE_HAS_ATOMIC64
-
-// 64-bit Acquire/Release operations to be deprecated.
-
-inline base::subtle::Atomic64 Acquire_CompareAndSwap(
-    volatile base::subtle::Atomic64* ptr,
-    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
-  return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value);
-}
-inline base::subtle::Atomic64 Release_CompareAndSwap(
-    volatile base::subtle::Atomic64* ptr,
-    base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) {
-  return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value);
-}
-inline void Acquire_Store(
-    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
-  base::subtle::Acquire_Store(ptr, value);
-}
-inline void Release_Store(
-    volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) {
-  return base::subtle::Release_Store(ptr, value);
-}
-inline base::subtle::Atomic64 Acquire_Load(
-    volatile const base::subtle::Atomic64* ptr) {
-  return base::subtle::Acquire_Load(ptr);
-}
-inline base::subtle::Atomic64 Release_Load(
-    volatile const base::subtle::Atomic64* ptr) {
-  return base::subtle::Release_Load(ptr);
-}
-
-#endif  // BASE_HAS_ATOMIC64
-
-#endif  // THREAD_ATOMICOPS_H_
diff --git a/contrib/libtcmalloc/src/base/basictypes.h b/contrib/libtcmalloc/src/base/basictypes.h
deleted file mode 100644
index a81d0466c27..00000000000
--- a/contrib/libtcmalloc/src/base/basictypes.h
+++ /dev/null
@@ -1,408 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef _BASICTYPES_H_
-#define _BASICTYPES_H_
-
-#include "../config.h"
-#include <string.h>       // for memcpy()
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>     // gets us PRId64, etc
-#endif
-
-// To use this in an autoconf setting, make sure you run the following
-// autoconf macros:
-//    AC_HEADER_STDC              /* for stdint_h and inttypes_h */
-//    AC_CHECK_TYPES([__int64])   /* defined in some windows platforms */
-
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>           // uint16_t might be here; PRId64 too.
-#endif
-#ifdef HAVE_STDINT_H
-#include <stdint.h>             // to get uint16_t (ISO naming madness)
-#endif
-#include <sys/types.h>          // our last best hope for uint16_t
-
-// Standard typedefs
-// All Google code is compiled with -funsigned-char to make "char"
-// unsigned.  Google code therefore doesn't need a "uchar" type.
-// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems?
-typedef signed char         schar;
-typedef int8_t              int8;
-typedef int16_t             int16;
-typedef int32_t             int32;
-typedef int64_t             int64;
-
-// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
-// places.  Use the signed types unless your variable represents a bit
-// pattern (eg a hash value) or you really need the extra bit.  Do NOT
-// use 'unsigned' to express "this value should always be positive";
-// use assertions for this.
-
-typedef uint8_t            uint8;
-typedef uint16_t           uint16;
-typedef uint32_t           uint32;
-typedef uint64_t           uint64;
-
-const uint16 kuint16max = (   (uint16) 0xFFFF);
-const uint32 kuint32max = (   (uint32) 0xFFFFFFFF);
-const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max );
-
-const  int8  kint8max   = (   (  int8) 0x7F);
-const  int16 kint16max  = (   ( int16) 0x7FFF);
-const  int32 kint32max  = (   ( int32) 0x7FFFFFFF);
-const  int64 kint64max =  ( ((( int64) kint32max) << 32) | kuint32max );
-
-const  int8  kint8min   = (   (  int8) 0x80);
-const  int16 kint16min  = (   ( int16) 0x8000);
-const  int32 kint32min  = (   ( int32) 0x80000000);
-const  int64 kint64min =  ( (((uint64) kint32min) << 32) | 0 );
-
-// Define the "portable" printf and scanf macros, if they're not
-// already there (via the inttypes.h we #included above, hopefully).
-// Mostly it's old systems that don't support inttypes.h, so we assume
-// they're 32 bit.
-#ifndef PRIx64
-#define PRIx64 "llx"
-#endif
-#ifndef SCNx64
-#define SCNx64 "llx"
-#endif
-#ifndef PRId64
-#define PRId64 "lld"
-#endif
-#ifndef SCNd64
-#define SCNd64 "lld"
-#endif
-#ifndef PRIu64
-#define PRIu64 "llu"
-#endif
-#ifndef PRIxPTR
-#define PRIxPTR "lx"
-#endif
-
-// Also allow for printing of a pthread_t.
-#define GPRIuPTHREAD "lu"
-#define GPRIxPTHREAD "lx"
-#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__)
-#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt)
-#else
-#define PRINTABLE_PTHREAD(pthreadt) pthreadt
-#endif
-
-// A macro to disallow the evil copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#define DISALLOW_EVIL_CONSTRUCTORS(TypeName)    \
-  TypeName(const TypeName&);                    \
-  void operator=(const TypeName&)
-
-// An alternate name that leaves out the moral judgment... :-)
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName)
-
-// The COMPILE_ASSERT macro can be used to verify that a compile time
-// expression is true. For example, you could use it to verify the
-// size of a static array:
-//
-//   COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int),
-//                  content_type_names_incorrect_size);
-//
-// or to make sure a struct is smaller than a certain size:
-//
-//   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
-//
-// The second argument to the macro is the name of the variable. If
-// the expression is false, most compilers will issue a warning/error
-// containing the name of the variable.
-//
-// Implementation details of COMPILE_ASSERT:
-//
-// - COMPILE_ASSERT works by defining an array type that has -1
-//   elements (and thus is invalid) when the expression is false.
-//
-// - The simpler definition
-//
-//     #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
-//
-//   does not work, as gcc supports variable-length arrays whose sizes
-//   are determined at run-time (this is gcc's extension and not part
-//   of the C++ standard).  As a result, gcc fails to reject the
-//   following code with the simple definition:
-//
-//     int foo;
-//     COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
-//                               // not a compile-time constant.
-//
-// - By using the type CompileAssert<(bool(expr))>, we ensures that
-//   expr is a compile-time constant.  (Template arguments must be
-//   determined at compile-time.)
-//
-// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
-//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
-//
-//     CompileAssert<bool(expr)>
-//
-//   instead, these compilers will refuse to compile
-//
-//     COMPILE_ASSERT(5 > 0, some_message);
-//
-//   (They seem to think the ">" in "5 > 0" marks the end of the
-//   template argument list.)
-//
-// - The array size is (bool(expr) ? 1 : -1), instead of simply
-//
-//     ((expr) ? 1 : -1).
-//
-//   This is to avoid running into a bug in MS VC 7.1, which
-//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
-
-template <bool>
-struct CompileAssert {
-};
-
-#ifdef HAVE___ATTRIBUTE__
-# define ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-# define ATTRIBUTE_UNUSED
-#endif
-
-#if defined(HAVE___ATTRIBUTE__) && defined(HAVE_TLS)
-#define ATTR_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec")))
-#else
-#define ATTR_INITIAL_EXEC
-#endif
-
-#define COMPILE_ASSERT(expr, msg)                               \
-  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED
-
-#define arraysize(a)  (sizeof(a) / sizeof(*(a)))
-
-#define OFFSETOF_MEMBER(strct, field)                                   \
-   (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) -     \
-    reinterpret_cast<char*>(16))
-
-// bit_cast<Dest,Source> implements the equivalent of
-// "*reinterpret_cast<Dest*>(&source)".
-//
-// The reinterpret_cast method would produce undefined behavior
-// according to ISO C++ specification section 3.10 -15 -.
-// bit_cast<> calls memcpy() which is blessed by the standard,
-// especially by the example in section 3.9.
-//
-// Fortunately memcpy() is very fast.  In optimized mode, with a
-// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
-// code with the minimal amount of data movement.  On a 32-bit system,
-// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
-// compiles to two loads and two stores.
-
-template <class Dest, class Source>
-inline Dest bit_cast(const Source& source) {
-  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
-  Dest dest;
-  memcpy(&dest, &source, sizeof(dest));
-  return dest;
-}
-
-// bit_store<Dest,Source> implements the equivalent of
-// "dest = *reinterpret_cast<Dest*>(&source)".
-//
-// This prevents undefined behavior when the dest pointer is unaligned.
-template <class Dest, class Source>
-inline void bit_store(Dest *dest, const Source *source) {
-  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
-  memcpy(dest, source, sizeof(Dest));
-}
-
-#ifdef HAVE___ATTRIBUTE__
-# define ATTRIBUTE_WEAK      __attribute__((weak))
-# define ATTRIBUTE_NOINLINE  __attribute__((noinline))
-#else
-# define ATTRIBUTE_WEAK
-# define ATTRIBUTE_NOINLINE
-#endif
-
-#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
-# define ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
-#else
-# define ATTRIBUTE_VISIBILITY_HIDDEN
-#endif
-
-// Section attributes are supported for both ELF and Mach-O, but in
-// very different ways.  Here's the API we provide:
-// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions
-//    you want to be in the same linker section
-// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique
-//    name.  You want to make sure this is executed before any
-//    DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them
-//    in the same .cc file.  Put this call at the global level.
-// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in
-//    multiple places to help ensure execution before any
-//    DECLARE_ATTRIBUTE_SECTION_VARS.  You must have at least one
-//    DEFINE, but you can have many INITs.  Put each in its own scope.
-// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using
-//    ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name.
-//    Put this call at the global level.
-// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say
-//    where in memory a given section is.  All functions declared with
-//    ATTRIBUTE_SECTION are guaranteed to be between START and STOP.
-
-#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__)
-# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name)))
-
-  // Weak section declaration to be used as a global declaration
-  // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link
-  // even without functions with ATTRIBUTE_SECTION(name).
-# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \
-    extern char __start_##name[] ATTRIBUTE_WEAK; \
-    extern char __stop_##name[] ATTRIBUTE_WEAK
-# define INIT_ATTRIBUTE_SECTION_VARS(name)     // no-op for ELF
-# define DEFINE_ATTRIBUTE_SECTION_VARS(name)   // no-op for ELF
-
-  // Return void* pointers to start/end of a section of code with functions
-  // having ATTRIBUTE_SECTION(name), or 0 if no such function exists.
-  // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link.
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
-# define HAVE_ATTRIBUTE_SECTION_START 1
-
-#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__)
-# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name)))
-
-#include <mach-o/getsect.h>
-#include <mach-o/dyld.h>
-class AssignAttributeStartEnd {
- public:
-  AssignAttributeStartEnd(const char* name, char** pstart, char** pend) {
-    // Find out what dynamic library name is defined in
-    if (_dyld_present()) {
-      for (int i = _dyld_image_count() - 1; i >= 0; --i) {
-        const mach_header* hdr = _dyld_get_image_header(i);
-#ifdef MH_MAGIC_64
-        if (hdr->magic == MH_MAGIC_64) {
-          uint64_t len;
-          *pstart = getsectdatafromheader_64((mach_header_64*)hdr,
-                                             "__TEXT", name, &len);
-          if (*pstart) {   // NULL if not defined in this dynamic library
-            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
-            *pend = *pstart + len;
-            return;
-          }
-        }
-#endif
-        if (hdr->magic == MH_MAGIC) {
-          uint32_t len;
-          *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len);
-          if (*pstart) {   // NULL if not defined in this dynamic library
-            *pstart += _dyld_get_image_vmaddr_slide(i);   // correct for reloc
-            *pend = *pstart + len;
-            return;
-          }
-        }
-      }
-    }
-    // If we get here, not defined in a dll at all.  See if defined statically.
-    unsigned long len;    // don't ask me why this type isn't uint32_t too...
-    *pstart = getsectdata("__TEXT", name, &len);
-    *pend = *pstart + len;
-  }
-};
-
-#define DECLARE_ATTRIBUTE_SECTION_VARS(name)    \
-  extern char* __start_##name;                  \
-  extern char* __stop_##name
-
-#define INIT_ATTRIBUTE_SECTION_VARS(name)               \
-  DECLARE_ATTRIBUTE_SECTION_VARS(name);                 \
-  static const AssignAttributeStartEnd __assign_##name( \
-    #name, &__start_##name, &__stop_##name)
-
-#define DEFINE_ATTRIBUTE_SECTION_VARS(name)     \
-  char* __start_##name, *__stop_##name;         \
-  INIT_ATTRIBUTE_SECTION_VARS(name)
-
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name))
-# define HAVE_ATTRIBUTE_SECTION_START 1
-
-#else  // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__
-# define ATTRIBUTE_SECTION(name)
-# define DECLARE_ATTRIBUTE_SECTION_VARS(name)
-# define INIT_ATTRIBUTE_SECTION_VARS(name)
-# define DEFINE_ATTRIBUTE_SECTION_VARS(name)
-# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0))
-# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0))
-
-#endif  // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
-
-#if defined(HAVE___ATTRIBUTE__)
-# if (defined(__i386__) || defined(__x86_64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-# elif (defined(__PPC__) || defined(__PPC64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(16)))
-# elif (defined(__arm__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-    // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
-# elif (defined(__mips__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(128)))
-# elif (defined(__aarch64__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(64)))
-    // implementation specific, Cortex-A53 and 57 should have 64 bytes
-# elif (defined(__s390__))
-#   define CACHELINE_ALIGNED __attribute__((aligned(256)))
-# else
-#   error Could not determine cache line length - unknown architecture
-# endif
-#else
-# define CACHELINE_ALIGNED
-#endif  // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
-
-// Structure for discovering alignment
-union MemoryAligner {
-  void*  p;
-  double d;
-  size_t s;
-} CACHELINE_ALIGNED;
-
-// The following enum should be used only as a constructor argument to indicate
-// that the variable has static storage class, and that the constructor should
-// do nothing to its state.  It indicates to the reader that it is legal to
-// declare a static nistance of the class, provided the constructor is given
-// the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
-// static variable that has a constructor or a destructor because invocation
-// order is undefined.  However, IF the type can be initialized by filling with
-// zeroes (which the loader does for static variables), AND the destructor also
-// does nothing to the storage, then a constructor declared as
-//       explicit MyClass(base::LinkerInitialized x) {}
-// and invoked as
-//       static MyClass my_variable_name(base::LINKER_INITIALIZED);
-namespace base {
-enum LinkerInitialized { LINKER_INITIALIZED };
-}
-
-#endif  // _BASICTYPES_H_
diff --git a/contrib/libtcmalloc/src/base/commandlineflags.h b/contrib/libtcmalloc/src/base/commandlineflags.h
deleted file mode 100644
index e940edd3791..00000000000
--- a/contrib/libtcmalloc/src/base/commandlineflags.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file is a compatibility layer that defines Google's version of
-// command line flags that are used for configuration.
-//
-// We put flags into their own namespace.  It is purposefully
-// named in an opaque way that people should have trouble typing
-// directly.  The idea is that DEFINE puts the flag in the weird
-// namespace, and DECLARE imports the flag from there into the
-// current namespace.  The net result is to force people to use
-// DECLARE to get access to a flag, rather than saying
-//   extern bool FLAGS_logtostderr;
-// or some such instead.  We want this so we can put extra
-// functionality (like sanity-checking) in DECLARE if we want,
-// and make sure it is picked up everywhere.
-//
-// We also put the type of the variable in the namespace, so that
-// people can't DECLARE_int32 something that they DEFINE_bool'd
-// elsewhere.
-#ifndef BASE_COMMANDLINEFLAGS_H_
-#define BASE_COMMANDLINEFLAGS_H_
-
-#include "../config.h"
-#include <string>
-#include <string.h>               // for memchr
-#include <stdlib.h>               // for getenv
-#include "base/basictypes.h"
-
-#define DECLARE_VARIABLE(type, name)                                          \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
-  extern PERFTOOLS_DLL_DECL type FLAGS_##name;                                \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
-
-#define DEFINE_VARIABLE(type, name, value, meaning) \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
-  PERFTOOLS_DLL_DECL type FLAGS_##name(value);                                \
-  char FLAGS_no##name;                                                        \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
-
-// bool specialization
-#define DECLARE_bool(name) \
-  DECLARE_VARIABLE(bool, name)
-#define DEFINE_bool(name, value, meaning) \
-  DEFINE_VARIABLE(bool, name, value, meaning)
-
-// int32 specialization
-#define DECLARE_int32(name) \
-  DECLARE_VARIABLE(int32, name)
-#define DEFINE_int32(name, value, meaning) \
-  DEFINE_VARIABLE(int32, name, value, meaning)
-
-// int64 specialization
-#define DECLARE_int64(name) \
-  DECLARE_VARIABLE(int64, name)
-#define DEFINE_int64(name, value, meaning) \
-  DEFINE_VARIABLE(int64, name, value, meaning)
-
-#define DECLARE_uint64(name) \
-  DECLARE_VARIABLE(uint64, name)
-#define DEFINE_uint64(name, value, meaning) \
-  DEFINE_VARIABLE(uint64, name, value, meaning)
-
-// double specialization
-#define DECLARE_double(name) \
-  DECLARE_VARIABLE(double, name)
-#define DEFINE_double(name, value, meaning) \
-  DEFINE_VARIABLE(double, name, value, meaning)
-
-// Special case for string, because we have to specify the namespace
-// std::string, which doesn't play nicely with our FLAG__namespace hackery.
-#define DECLARE_string(name)                                          \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
-  extern std::string FLAGS_##name;                                                   \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
-#define DEFINE_string(name, value, meaning) \
-  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
-  std::string FLAGS_##name(value);                                                   \
-  char FLAGS_no##name;                                                        \
-  }                                                                           \
-  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
-
-// implemented in sysinfo.cc
-namespace tcmalloc {
-  namespace commandlineflags {
-
-    inline bool StringToBool(const char *value, bool def) {
-      if (!value) {
-        return def;
-      }
-      return memchr("tTyY1\0", value[0], 6) != NULL;
-    }
-
-    inline int StringToInt(const char *value, int def) {
-      if (!value) {
-        return def;
-      }
-      return strtol(value, NULL, 10);
-    }
-
-    inline long long StringToLongLong(const char *value, long long def) {
-      if (!value) {
-        return def;
-      }
-      return strtoll(value, NULL, 10);
-    }
-
-    inline double StringToDouble(const char *value, double def) {
-      if (!value) {
-        return def;
-      }
-      return strtod(value, NULL);
-    }
-  }
-}
-
-// These macros (could be functions, but I don't want to bother with a .cc
-// file), make it easier to initialize flags from the environment.
-
-#define EnvToString(envname, dflt)   \
-  (!getenv(envname) ? (dflt) : getenv(envname))
-
-#define EnvToBool(envname, dflt)   \
-  tcmalloc::commandlineflags::StringToBool(getenv(envname), dflt)
-
-#define EnvToInt(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToInt(getenv(envname), dflt)
-
-#define EnvToInt64(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToLongLong(getenv(envname), dflt)
-
-#define EnvToDouble(envname, dflt)  \
-  tcmalloc::commandlineflags::StringToDouble(getenv(envname), dflt)
-
-#endif  // BASE_COMMANDLINEFLAGS_H_
diff --git a/contrib/libtcmalloc/src/base/dynamic_annotations.c b/contrib/libtcmalloc/src/base/dynamic_annotations.c
deleted file mode 100644
index 87bd2ecde97..00000000000
--- a/contrib/libtcmalloc/src/base/dynamic_annotations.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/* Copyright (c) 2008-2009, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-#ifdef __cplusplus
-# error "This file should be built as pure C to avoid name mangling"
-#endif
-
-#include "config.h"
-#include <stdlib.h>
-#include <string.h>
-
-#include "base/dynamic_annotations.h"
-#include "getenv_safe.h" // for TCMallocGetenvSafe
-
-#ifdef __GNUC__
-/* valgrind.h uses gcc extensions so it won't build with other compilers */
-# ifdef HAVE_VALGRIND_H    /* prefer the user's copy if they have it */
-#  include <valgrind.h>
-# else                     /* otherwise just use the copy that we have */
-#  include "third_party/valgrind.h"
-# endif
-#endif
-
-/* Compiler-based ThreadSanitizer defines
-   DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1
-   and provides its own definitions of the functions. */
-
-#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL
-# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0
-#endif
-
-/* Each function is empty and called (via a macro) only in debug mode.
-   The arguments are captured by dynamic tools at runtime. */
-
-#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \
-    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
-
-void AnnotateRWLockCreate(const char *file, int line,
-                          const volatile void *lock){}
-void AnnotateRWLockDestroy(const char *file, int line,
-                           const volatile void *lock){}
-void AnnotateRWLockAcquired(const char *file, int line,
-                            const volatile void *lock, long is_w){}
-void AnnotateRWLockReleased(const char *file, int line,
-                            const volatile void *lock, long is_w){}
-void AnnotateBarrierInit(const char *file, int line,
-                         const volatile void *barrier, long count,
-                         long reinitialization_allowed) {}
-void AnnotateBarrierWaitBefore(const char *file, int line,
-                               const volatile void *barrier) {}
-void AnnotateBarrierWaitAfter(const char *file, int line,
-                              const volatile void *barrier) {}
-void AnnotateBarrierDestroy(const char *file, int line,
-                            const volatile void *barrier) {}
-
-void AnnotateCondVarWait(const char *file, int line,
-                         const volatile void *cv,
-                         const volatile void *lock){}
-void AnnotateCondVarSignal(const char *file, int line,
-                           const volatile void *cv){}
-void AnnotateCondVarSignalAll(const char *file, int line,
-                              const volatile void *cv){}
-void AnnotatePublishMemoryRange(const char *file, int line,
-                                const volatile void *address,
-                                long size){}
-void AnnotateUnpublishMemoryRange(const char *file, int line,
-                                  const volatile void *address,
-                                  long size){}
-void AnnotatePCQCreate(const char *file, int line,
-                       const volatile void *pcq){}
-void AnnotatePCQDestroy(const char *file, int line,
-                        const volatile void *pcq){}
-void AnnotatePCQPut(const char *file, int line,
-                    const volatile void *pcq){}
-void AnnotatePCQGet(const char *file, int line,
-                    const volatile void *pcq){}
-void AnnotateNewMemory(const char *file, int line,
-                       const volatile void *mem,
-                       long size){}
-void AnnotateExpectRace(const char *file, int line,
-                        const volatile void *mem,
-                        const char *description){}
-void AnnotateBenignRace(const char *file, int line,
-                        const volatile void *mem,
-                        const char *description){}
-void AnnotateBenignRaceSized(const char *file, int line,
-                             const volatile void *mem,
-                             long size,
-                             const char *description) {}
-void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
-                                  const volatile void *mu){}
-void AnnotateTraceMemory(const char *file, int line,
-                         const volatile void *arg){}
-void AnnotateThreadName(const char *file, int line,
-                        const char *name){}
-void AnnotateIgnoreReadsBegin(const char *file, int line){}
-void AnnotateIgnoreReadsEnd(const char *file, int line){}
-void AnnotateIgnoreWritesBegin(const char *file, int line){}
-void AnnotateIgnoreWritesEnd(const char *file, int line){}
-void AnnotateEnableRaceDetection(const char *file, int line, int enable){}
-void AnnotateNoOp(const char *file, int line,
-                  const volatile void *arg){}
-void AnnotateFlushState(const char *file, int line){}
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1
-    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
-
-#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
-
-static int GetRunningOnValgrind(void) {
-#ifdef RUNNING_ON_VALGRIND
-  if (RUNNING_ON_VALGRIND) return 1;
-#endif
-  const char *running_on_valgrind_str = TCMallocGetenvSafe("RUNNING_ON_VALGRIND");
-  if (running_on_valgrind_str) {
-    return strcmp(running_on_valgrind_str, "0") != 0;
-  }
-  return 0;
-}
-
-/* See the comments in dynamic_annotations.h */
-int RunningOnValgrind(void) {
-  static volatile int running_on_valgrind = -1;
-  int local_running_on_valgrind = running_on_valgrind;
-  /* C doesn't have thread-safe initialization of statics, and we
-     don't want to depend on pthread_once here, so hack it. */
-  ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
-  if (local_running_on_valgrind == -1)
-    running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
-  return local_running_on_valgrind;
-}
-
-#endif  /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
-
-/* See the comments in dynamic_annotations.h */
-double ValgrindSlowdown(void) {
-  /* Same initialization hack as in RunningOnValgrind(). */
-  static volatile double slowdown = 0.0;
-  double local_slowdown = slowdown;
-  ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
-  if (RunningOnValgrind() == 0) {
-    return 1.0;
-  }
-  if (local_slowdown == 0.0) {
-    char *env = getenv("VALGRIND_SLOWDOWN");
-    slowdown = local_slowdown = env ? atof(env) : 50.0;
-  }
-  return local_slowdown;
-}
diff --git a/contrib/libtcmalloc/src/base/dynamic_annotations.h b/contrib/libtcmalloc/src/base/dynamic_annotations.h
deleted file mode 100644
index 4669315ced3..00000000000
--- a/contrib/libtcmalloc/src/base/dynamic_annotations.h
+++ /dev/null
@@ -1,627 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-/* This file defines dynamic annotations for use with dynamic analysis
-   tool such as valgrind, PIN, etc.
-
-   Dynamic annotation is a source code annotation that affects
-   the generated code (that is, the annotation is not a comment).
-   Each such annotation is attached to a particular
-   instruction and/or to a particular object (address) in the program.
-
-   The annotations that should be used by users are macros in all upper-case
-   (e.g., ANNOTATE_NEW_MEMORY).
-
-   Actual implementation of these macros may differ depending on the
-   dynamic analysis tool being used.
-
-   See http://code.google.com/p/data-race-test/  for more information.
-
-   This file supports the following dynamic analysis tools:
-   - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
-      Macros are defined empty.
-   - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
-      Macros are defined as calls to non-inlinable empty functions
-      that are intercepted by Valgrind. */
-
-#ifndef BASE_DYNAMIC_ANNOTATIONS_H_
-#define BASE_DYNAMIC_ANNOTATIONS_H_
-
-#ifndef DYNAMIC_ANNOTATIONS_ENABLED
-# define DYNAMIC_ANNOTATIONS_ENABLED 0
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing condition variables such as CondVar,
-     using conditional critical sections (Await/LockWhen) and when constructing
-     user-defined synchronization mechanisms.
-
-     The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
-     be used to define happens-before arcs in user-defined synchronization
-     mechanisms:  the race detector will infer an arc from the former to the
-     latter when they share the same argument pointer.
-
-     Example 1 (reference counting):
-
-     void Unref() {
-       ANNOTATE_HAPPENS_BEFORE(&refcount_);
-       if (AtomicDecrementByOne(&refcount_) == 0) {
-         ANNOTATE_HAPPENS_AFTER(&refcount_);
-         delete this;
-       }
-     }
-
-     Example 2 (message queue):
-
-     void MyQueue::Put(Type *e) {
-       MutexLock lock(&mu_);
-       ANNOTATE_HAPPENS_BEFORE(e);
-       PutElementIntoMyQueue(e);
-     }
-
-     Type *MyQueue::Get() {
-       MutexLock lock(&mu_);
-       Type *e = GetElementFromMyQueue();
-       ANNOTATE_HAPPENS_AFTER(e);
-       return e;
-     }
-
-     Note: when possible, please use the existing reference counting and message
-     queue implementations instead of inventing new ones. */
-
-  /* Report that wait on the condition variable at address "cv" has succeeded
-     and the lock at address "lock" is held. */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
-    AnnotateCondVarWait(__FILE__, __LINE__, cv, lock)
-
-  /* Report that wait on the condition variable at "cv" has succeeded.  Variant
-     w/o lock. */
-  #define ANNOTATE_CONDVAR_WAIT(cv) \
-    AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL)
-
-  /* Report that we are about to signal on the condition variable at address
-     "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) \
-    AnnotateCondVarSignal(__FILE__, __LINE__, cv)
-
-  /* Report that we are about to signal_all on the condition variable at "cv". */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
-    AnnotateCondVarSignalAll(__FILE__, __LINE__, cv)
-
-  /* Annotations for user-defined synchronization mechanisms. */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj)
-  #define ANNOTATE_HAPPENS_AFTER(obj)  ANNOTATE_CONDVAR_WAIT(obj)
-
-  /* Report that the bytes in the range [pointer, pointer+size) are about
-     to be published safely. The race checker will create a happens-before
-     arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
-     subsequent accesses to this memory.
-     Note: this annotation may not work properly if the race detector uses
-     sampling, i.e. does not observe all memory accesses.
-     */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
-    AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
-    AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size)
-
-  /* DEPRECATED. Don't use it. */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size)   \
-    do {                                              \
-      ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
-      ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size);   \
-    } while (0)
-
-  /* Instruct the tool to create a happens-before arc between mu->Unlock() and
-     mu->Lock(). This annotation may slow down the race detector and hide real
-     races. Normally it is used only when it would be difficult to annotate each
-     of the mutex's critical sections individually using the annotations above.
-     This annotation makes sense only for hybrid race detectors. For pure
-     happens-before detectors this is a no-op. For more details see
-     http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
-    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
-
-  /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
-    AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining memory allocators, or when memory that
-     was protected in one way starts to be protected in another. */
-
-  /* Report that a new memory at "address" of size "size" has been allocated.
-     This might be used when the memory has been retrieved from a free list and
-     is about to be reused, or when a the locking discipline for a variable
-     changes. */
-  #define ANNOTATE_NEW_MEMORY(address, size) \
-    AnnotateNewMemory(__FILE__, __LINE__, address, size)
-
-  /* -------------------------------------------------------------
-     Annotations useful when defining FIFO queues that transfer data between
-     threads. */
-
-  /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
-     address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
-     should be used only for FIFO queues.  For non-FIFO queues use
-     ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
-  #define ANNOTATE_PCQ_CREATE(pcq) \
-    AnnotatePCQCreate(__FILE__, __LINE__, pcq)
-
-  /* Report that the queue at address "pcq" is about to be destroyed. */
-  #define ANNOTATE_PCQ_DESTROY(pcq) \
-    AnnotatePCQDestroy(__FILE__, __LINE__, pcq)
-
-  /* Report that we are about to put an element into a FIFO queue at address
-     "pcq". */
-  #define ANNOTATE_PCQ_PUT(pcq) \
-    AnnotatePCQPut(__FILE__, __LINE__, pcq)
-
-  /* Report that we've just got an element from a FIFO queue at address "pcq". */
-  #define ANNOTATE_PCQ_GET(pcq) \
-    AnnotatePCQGet(__FILE__, __LINE__, pcq)
-
-  /* -------------------------------------------------------------
-     Annotations that suppress errors.  It is usually better to express the
-     program's synchronization using the other annotations, but these can
-     be used when all else fails. */
-
-  /* Report that we may have a benign race at "pointer", with size
-     "sizeof(*(pointer))". "pointer" must be a non-void* pointer.  Insert at the
-     point where "pointer" has been allocated, preferably close to the point
-     where the race happens.  See also ANNOTATE_BENIGN_RACE_STATIC. */
-  #define ANNOTATE_BENIGN_RACE(pointer, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \
-                            sizeof(*(pointer)), description)
-
-  /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
-     the memory range [address, address+size). */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description)
-
-  /* Request the analysis tool to ignore all reads in the current thread
-     until ANNOTATE_IGNORE_READS_END is called.
-     Useful to ignore intentional racey reads, while still checking
-     other reads and all writes.
-     See also ANNOTATE_UNPROTECTED_READ. */
-  #define ANNOTATE_IGNORE_READS_BEGIN() \
-    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
-
-  /* Stop ignoring reads. */
-  #define ANNOTATE_IGNORE_READS_END() \
-    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
-
-  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() \
-    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
-  /* Stop ignoring writes. */
-  #define ANNOTATE_IGNORE_WRITES_END() \
-    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-
-  /* Start ignoring all memory accesses (reads and writes). */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
-    do {\
-      ANNOTATE_IGNORE_READS_BEGIN();\
-      ANNOTATE_IGNORE_WRITES_BEGIN();\
-    }while(0)\
-
-  /* Stop ignoring all memory accesses. */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
-    do {\
-      ANNOTATE_IGNORE_WRITES_END();\
-      ANNOTATE_IGNORE_READS_END();\
-    }while(0)\
-
-  /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
-     This annotation could be useful if you want to skip expensive race analysis
-     during some period of program execution, e.g. during initialization. */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
-    AnnotateEnableRaceDetection(__FILE__, __LINE__, enable)
-
-  /* -------------------------------------------------------------
-     Annotations useful for debugging. */
-
-  /* Request to trace every access to "address". */
-  #define ANNOTATE_TRACE_MEMORY(address) \
-    AnnotateTraceMemory(__FILE__, __LINE__, address)
-
-  /* Report the current thread name to a race detector. */
-  #define ANNOTATE_THREAD_NAME(name) \
-    AnnotateThreadName(__FILE__, __LINE__, name)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing locks.  They are not
-     normally needed by modules that merely use locks.
-     The "lock" argument is a pointer to the lock object. */
-
-  /* Report that a lock has been created at address "lock". */
-  #define ANNOTATE_RWLOCK_CREATE(lock) \
-    AnnotateRWLockCreate(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" is about to be destroyed. */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) \
-    AnnotateRWLockDestroy(__FILE__, __LINE__, lock)
-
-  /* Report that the lock at address "lock" has been acquired.
-     is_w=1 for writer lock, is_w=0 for reader lock. */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
-    AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w)
-
-  /* Report that the lock at address "lock" is about to be released. */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
-    AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w)
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing barriers.  They are not
-     normally needed by modules that merely use barriers.
-     The "barrier" argument is a pointer to the barrier object. */
-
-  /* Report that the "barrier" has been initialized with initial "count".
-   If 'reinitialization_allowed' is true, initialization is allowed to happen
-   multiple times w/o calling barrier_destroy() */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
-    AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \
-                        reinitialization_allowed)
-
-  /* Report that we are about to enter barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
-    AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier)
-
-  /* Report that we just exited barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
-    AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier)
-
-  /* Report that the "barrier" has been destroyed. */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) \
-    AnnotateBarrierDestroy(__FILE__, __LINE__, barrier)
-
-  /* -------------------------------------------------------------
-     Annotations useful for testing race detectors. */
-
-  /* Report that we expect a race on the variable at "address".
-     Use only in unit tests for a race detector. */
-  #define ANNOTATE_EXPECT_RACE(address, description) \
-    AnnotateExpectRace(__FILE__, __LINE__, address, description)
-
-  /* A no-op. Insert where you like to test the interceptors. */
-  #define ANNOTATE_NO_OP(arg) \
-    AnnotateNoOp(__FILE__, __LINE__, arg)
-
-  /* Force the race detector to flush its state. The actual effect depends on
-   * the implementation of the detector. */
-  #define ANNOTATE_FLUSH_STATE() \
-    AnnotateFlushState(__FILE__, __LINE__)
-
-
-#else  /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
-  #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
-  #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
-  #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
-  #define ANNOTATE_PCQ_PUT(pcq) /* empty */
-  #define ANNOTATE_PCQ_GET(pcq) /* empty */
-  #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
-  #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
-  #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
-  #define ANNOTATE_THREAD_NAME(name) /* empty */
-  #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_END() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_END() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
-  #define ANNOTATE_NO_OP(arg) /* empty */
-  #define ANNOTATE_FLUSH_STATE() /* empty */
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Macro definitions for GCC attributes that allow static thread safety
-   analysis to recognize and use some of the dynamic annotations as
-   escape hatches.
-   TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the
-   default crosstool/GCC supports these GCC attributes.  */
-
-#define ANNOTALYSIS_STATIC_INLINE
-#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ;
-#define ANNOTALYSIS_IGNORE_READS_BEGIN
-#define ANNOTALYSIS_IGNORE_READS_END
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
-#define ANNOTALYSIS_IGNORE_WRITES_END
-#define ANNOTALYSIS_UNPROTECTED_READ
-
-#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \
-    defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
-
-#if DYNAMIC_ANNOTATIONS_ENABLED == 0
-#define ANNOTALYSIS_ONLY 1
-#undef ANNOTALYSIS_STATIC_INLINE
-#define ANNOTALYSIS_STATIC_INLINE static inline
-#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; }
-#endif
-
-/* Only emit attributes when annotalysis is enabled. */
-#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
-#undef  ANNOTALYSIS_IGNORE_READS_BEGIN
-#define ANNOTALYSIS_IGNORE_READS_BEGIN  __attribute__ ((ignore_reads_begin))
-#undef  ANNOTALYSIS_IGNORE_READS_END
-#define ANNOTALYSIS_IGNORE_READS_END    __attribute__ ((ignore_reads_end))
-#undef  ANNOTALYSIS_IGNORE_WRITES_BEGIN
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin))
-#undef  ANNOTALYSIS_IGNORE_WRITES_END
-#define ANNOTALYSIS_IGNORE_WRITES_END   __attribute__ ((ignore_writes_end))
-#undef  ANNOTALYSIS_UNPROTECTED_READ
-#define ANNOTALYSIS_UNPROTECTED_READ    __attribute__ ((unprotected_read))
-#endif
-
-#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__))
-
-/* Use the macros above rather than using these functions directly. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-void AnnotateRWLockCreate(const char *file, int line,
-                          const volatile void *lock);
-void AnnotateRWLockDestroy(const char *file, int line,
-                           const volatile void *lock);
-void AnnotateRWLockAcquired(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateRWLockReleased(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateBarrierInit(const char *file, int line,
-                         const volatile void *barrier, long count,
-                         long reinitialization_allowed);
-void AnnotateBarrierWaitBefore(const char *file, int line,
-                               const volatile void *barrier);
-void AnnotateBarrierWaitAfter(const char *file, int line,
-                              const volatile void *barrier);
-void AnnotateBarrierDestroy(const char *file, int line,
-                            const volatile void *barrier);
-void AnnotateCondVarWait(const char *file, int line,
-                         const volatile void *cv,
-                         const volatile void *lock);
-void AnnotateCondVarSignal(const char *file, int line,
-                           const volatile void *cv);
-void AnnotateCondVarSignalAll(const char *file, int line,
-                              const volatile void *cv);
-void AnnotatePublishMemoryRange(const char *file, int line,
-                                const volatile void *address,
-                                long size);
-void AnnotateUnpublishMemoryRange(const char *file, int line,
-                                  const volatile void *address,
-                                  long size);
-void AnnotatePCQCreate(const char *file, int line,
-                       const volatile void *pcq);
-void AnnotatePCQDestroy(const char *file, int line,
-                        const volatile void *pcq);
-void AnnotatePCQPut(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotatePCQGet(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotateNewMemory(const char *file, int line,
-                       const volatile void *address,
-                       long size);
-void AnnotateExpectRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRaceSized(const char *file, int line,
-                        const volatile void *address,
-                        long size,
-                        const char *description);
-void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
-                                  const volatile void *mu);
-void AnnotateTraceMemory(const char *file, int line,
-                         const volatile void *arg);
-void AnnotateThreadName(const char *file, int line,
-                        const char *name);
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreReadsBegin(const char *file, int line)
-    ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreReadsEnd(const char *file, int line)
-    ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreWritesBegin(const char *file, int line)
-    ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-ANNOTALYSIS_STATIC_INLINE
-void AnnotateIgnoreWritesEnd(const char *file, int line)
-    ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-void AnnotateEnableRaceDetection(const char *file, int line, int enable);
-void AnnotateNoOp(const char *file, int line,
-                  const volatile void *arg);
-void AnnotateFlushState(const char *file, int line);
-
-/* Return non-zero value if running under valgrind.
-
-  If "valgrind.h" is included into dynamic_annotations.c,
-  the regular valgrind mechanism will be used.
-  See http://valgrind.org/docs/manual/manual-core-adv.html about
-  RUNNING_ON_VALGRIND and other valgrind "client requests".
-  The file "valgrind.h" may be obtained by doing
-     svn co svn://svn.valgrind.org/valgrind/trunk/include
-
-  If for some reason you can't use "valgrind.h" or want to fake valgrind,
-  there are two ways to make this function return non-zero:
-    - Use environment variable: export RUNNING_ON_VALGRIND=1
-    - Make your tool intercept the function RunningOnValgrind() and
-      change its return value.
- */
-int RunningOnValgrind(void);
-
-/* ValgrindSlowdown returns:
-    * 1.0, if (RunningOnValgrind() == 0)
-    * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
-    * atof(getenv("VALGRIND_SLOWDOWN")) otherwise
-   This function can be used to scale timeout values:
-   EXAMPLE:
-   for (;;) {
-     DoExpensiveBackgroundTask();
-     SleepForSeconds(5 * ValgrindSlowdown());
-   }
- */
-double ValgrindSlowdown(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
-
-  /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
-
-     Instead of doing
-        ANNOTATE_IGNORE_READS_BEGIN();
-        ... = x;
-        ANNOTATE_IGNORE_READS_END();
-     one can use
-        ... = ANNOTATE_UNPROTECTED_READ(x); */
-  template <class T>
-  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
-      ANNOTALYSIS_UNPROTECTED_READ {
-    ANNOTATE_IGNORE_READS_BEGIN();
-    T res = x;
-    ANNOTATE_IGNORE_READS_END();
-    return res;
-  }
-  /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)        \
-    namespace {                                                       \
-      class static_var ## _annotator {                                \
-       public:                                                        \
-        static_var ## _annotator() {                                  \
-          ANNOTATE_BENIGN_RACE_SIZED(&static_var,                     \
-                                      sizeof(static_var),             \
-            # static_var ": " description);                           \
-        }                                                             \
-      };                                                              \
-      static static_var ## _annotator the ## static_var ## _annotator;\
-    }
-#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_UNPROTECTED_READ(x) (x)
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  /* empty */
-
-#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Annotalysis, a GCC based static analyzer, is able to understand and use
-   some of the dynamic annotations defined in this file. However, dynamic
-   annotations are usually disabled in the opt mode (to avoid additional
-   runtime overheads) while Annotalysis only works in the opt mode.
-   In order for Annotalysis to use these dynamic annotations when they
-   are disabled, we re-define these annotations here. Note that unlike the
-   original macro definitions above, these macros are expanded to calls to
-   static inline functions so that the compiler will be able to remove the
-   calls after the analysis. */
-
-#ifdef ANNOTALYSIS_ONLY
-
-  #undef ANNOTALYSIS_ONLY
-
-  /* Undefine and re-define the macros that the static analyzer understands. */
-  #undef ANNOTATE_IGNORE_READS_BEGIN
-  #define ANNOTATE_IGNORE_READS_BEGIN()           \
-    AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_READS_END
-  #define ANNOTATE_IGNORE_READS_END()             \
-    AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_WRITES_BEGIN
-  #define ANNOTATE_IGNORE_WRITES_BEGIN()          \
-    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_WRITES_END
-  #define ANNOTATE_IGNORE_WRITES_END()            \
-    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-
-  #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN()       \
-    do {                                                 \
-      ANNOTATE_IGNORE_READS_BEGIN();                     \
-      ANNOTATE_IGNORE_WRITES_BEGIN();                    \
-    }while(0)                                            \
-
-  #undef ANNOTATE_IGNORE_READS_AND_WRITES_END
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END()  \
-    do {                                          \
-      ANNOTATE_IGNORE_WRITES_END();               \
-      ANNOTATE_IGNORE_READS_END();                \
-    }while(0)                                     \
-
-  #if defined(__cplusplus)
-    #undef ANNOTATE_UNPROTECTED_READ
-    template <class T>
-    inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
-         ANNOTALYSIS_UNPROTECTED_READ {
-      ANNOTATE_IGNORE_READS_BEGIN();
-      T res = x;
-      ANNOTATE_IGNORE_READS_END();
-      return res;
-    }
-  #endif /* __cplusplus */
-
-#endif /* ANNOTALYSIS_ONLY */
-
-/* Undefine the macros intended only in this file. */
-#undef ANNOTALYSIS_STATIC_INLINE
-#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
-
-#endif  /* BASE_DYNAMIC_ANNOTATIONS_H_ */
diff --git a/contrib/libtcmalloc/src/base/elf_mem_image.cc b/contrib/libtcmalloc/src/base/elf_mem_image.cc
deleted file mode 100644
index d9605609e3a..00000000000
--- a/contrib/libtcmalloc/src/base/elf_mem_image.cc
+++ /dev/null
@@ -1,443 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in an in-memory Elf image.
-//
-
-#include "base/elf_mem_image.h"
-
-#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
-
-#include <stddef.h>   // for size_t, ptrdiff_t
-#include "base/logging.h"
-
-// From binutils/include/elf/common.h (this doesn't appear to be documented
-// anywhere else).
-//
-//   /* This flag appears in a Versym structure.  It means that the symbol
-//      is hidden, and is only visible with an explicit version number.
-//      This is a GNU extension.  */
-//   #define VERSYM_HIDDEN           0x8000
-//
-//   /* This is the mask for the rest of the Versym information.  */
-//   #define VERSYM_VERSION          0x7fff
-
-#define VERSYM_VERSION 0x7fff
-
-#if __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-const-variable"
-#endif
-
-namespace base {
-
-namespace {
-template <int N> class ElfClass {
- public:
-  static const int kElfClass = -1;
-  static int ElfBind(const ElfW(Sym) *) {
-    CHECK(false); // << "Unexpected word size";
-    return 0;
-  }
-  static int ElfType(const ElfW(Sym) *) {
-    CHECK(false); // << "Unexpected word size";
-    return 0;
-  }
-};
-
-template <> class ElfClass<32> {
- public:
-  static const int kElfClass = ELFCLASS32;
-  static int ElfBind(const ElfW(Sym) *symbol) {
-    return ELF32_ST_BIND(symbol->st_info);
-  }
-  static int ElfType(const ElfW(Sym) *symbol) {
-    return ELF32_ST_TYPE(symbol->st_info);
-  }
-};
-
-template <> class ElfClass<64> {
- public:
-  static const int kElfClass = ELFCLASS64;
-  static int ElfBind(const ElfW(Sym) *symbol) {
-    return ELF64_ST_BIND(symbol->st_info);
-  }
-  static int ElfType(const ElfW(Sym) *symbol) {
-    return ELF64_ST_TYPE(symbol->st_info);
-  }
-};
-
-typedef ElfClass<__WORDSIZE> CurrentElfClass;
-
-// Extract an element from one of the ELF tables, cast it to desired type.
-// This is just a simple arithmetic and a glorified cast.
-// Callers are responsible for bounds checking.
-template <class T>
-const T* GetTableElement(const ElfW(Ehdr) *ehdr,
-                         ElfW(Off) table_offset,
-                         ElfW(Word) element_size,
-                         size_t index) {
-  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
-                                    + table_offset
-                                    + index * element_size);
-}
-}  // namespace
-
-const void *const ElfMemImage::kInvalidBase =
-    reinterpret_cast<const void *>(~0L);
-
-ElfMemImage::ElfMemImage(const void *base) {
-  CHECK(base != kInvalidBase);
-  Init(base);
-}
-
-int ElfMemImage::GetNumSymbols() const {
-  if (!hash_) {
-    return 0;
-  }
-  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
-  return hash_[1];
-}
-
-const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
-  CHECK_LT(index, GetNumSymbols());
-  return dynsym_ + index;
-}
-
-const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
-  CHECK_LT(index, GetNumSymbols());
-  return versym_ + index;
-}
-
-const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
-  CHECK_LT(index, ehdr_->e_phnum);
-  return GetTableElement<ElfW(Phdr)>(ehdr_,
-                                     ehdr_->e_phoff,
-                                     ehdr_->e_phentsize,
-                                     index);
-}
-
-const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
-  CHECK_LT(offset, strsize_);
-  return dynstr_ + offset;
-}
-
-const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
-  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
-    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
-    return reinterpret_cast<const void *>(sym->st_value);
-  }
-  CHECK_LT(link_base_, sym->st_value);
-  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_;
-}
-
-const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
-  CHECK_LE(index, verdefnum_);
-  const ElfW(Verdef) *version_definition = verdef_;
-  while (version_definition->vd_ndx < index && version_definition->vd_next) {
-    const char *const version_definition_as_char =
-        reinterpret_cast<const char *>(version_definition);
-    version_definition =
-        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
-                                               version_definition->vd_next);
-  }
-  return version_definition->vd_ndx == index ? version_definition : NULL;
-}
-
-const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
-    const ElfW(Verdef) *verdef) const {
-  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
-}
-
-const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
-  CHECK_LT(offset, strsize_);
-  return dynstr_ + offset;
-}
-
-void ElfMemImage::Init(const void *base) {
-  ehdr_      = NULL;
-  dynsym_    = NULL;
-  dynstr_    = NULL;
-  versym_    = NULL;
-  verdef_    = NULL;
-  hash_      = NULL;
-  strsize_   = 0;
-  verdefnum_ = 0;
-  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
-  if (!base) {
-    return;
-  }
-  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base);
-  // Fake VDSO has low bit set.
-  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0);
-  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1);
-  const char *const base_as_char = reinterpret_cast<const char *>(base);
-  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
-      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
-    RAW_DCHECK(false, "no ELF magic"); // at %p", base);
-    return;
-  }
-  int elf_class = base_as_char[EI_CLASS];
-  if (elf_class != CurrentElfClass::kElfClass) {
-    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass);
-    return;
-  }
-  switch (base_as_char[EI_DATA]) {
-    case ELFDATA2LSB: {
-      if (__LITTLE_ENDIAN != __BYTE_ORDER) {
-        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
-        return;
-      }
-      break;
-    }
-    case ELFDATA2MSB: {
-      if (__BIG_ENDIAN != __BYTE_ORDER) {
-        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
-        return;
-      }
-      break;
-    }
-    default: {
-      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA];
-      return;
-    }
-  }
-
-  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
-  const ElfW(Phdr) *dynamic_program_header = NULL;
-  for (int i = 0; i < ehdr_->e_phnum; ++i) {
-    const ElfW(Phdr) *const program_header = GetPhdr(i);
-    switch (program_header->p_type) {
-      case PT_LOAD:
-        if (link_base_ == ~0L) {
-          link_base_ = program_header->p_vaddr;
-        }
-        break;
-      case PT_DYNAMIC:
-        dynamic_program_header = program_header;
-        break;
-    }
-  }
-  if (link_base_ == ~0L || !dynamic_program_header) {
-    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO");
-    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO");
-    // Mark this image as not present. Can not recur infinitely.
-    Init(0);
-    return;
-  }
-  ptrdiff_t relocation =
-      base_as_char - reinterpret_cast<const char *>(link_base_);
-  ElfW(Dyn) *dynamic_entry =
-      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
-                                    relocation);
-  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
-    ElfW(Xword) value = dynamic_entry->d_un.d_val;
-    if (fake_vdso) {
-      // A complication: in the real VDSO, dynamic entries are not relocated
-      // (it wasn't loaded by a dynamic loader). But when testing with a
-      // "fake" dlopen()ed vdso library, the loader relocates some (but
-      // not all!) of them before we get here.
-      if (dynamic_entry->d_tag == DT_VERDEF) {
-        // The only dynamic entry (of the ones we care about) libc-2.3.6
-        // loader doesn't relocate.
-        value += relocation;
-      }
-    } else {
-      // Real VDSO. Everything needs to be relocated.
-      value += relocation;
-    }
-    switch (dynamic_entry->d_tag) {
-      case DT_HASH:
-        hash_ = reinterpret_cast<ElfW(Word) *>(value);
-        break;
-      case DT_SYMTAB:
-        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
-        break;
-      case DT_STRTAB:
-        dynstr_ = reinterpret_cast<const char *>(value);
-        break;
-      case DT_VERSYM:
-        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
-        break;
-      case DT_VERDEF:
-        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
-        break;
-      case DT_VERDEFNUM:
-        verdefnum_ = dynamic_entry->d_un.d_val;
-        break;
-      case DT_STRSZ:
-        strsize_ = dynamic_entry->d_un.d_val;
-        break;
-      default:
-        // Unrecognized entries explicitly ignored.
-        break;
-    }
-  }
-  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
-      !verdef_ || !verdefnum_ || !strsize_) {
-    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)");
-    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)");
-    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)");
-    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)");
-    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)");
-    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)");
-    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)");
-    // Mark this image as not present. Can not recur infinitely.
-    Init(0);
-    return;
-  }
-}
-
-bool ElfMemImage::LookupSymbol(const char *name,
-                               const char *version,
-                               int type,
-                               SymbolInfo *info) const {
-  for (SymbolIterator it = begin(); it != end(); ++it) {
-    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 &&
-        CurrentElfClass::ElfType(it->symbol) == type) {
-      if (info) {
-        *info = *it;
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ElfMemImage::LookupSymbolByAddress(const void *address,
-                                        SymbolInfo *info_out) const {
-  for (SymbolIterator it = begin(); it != end(); ++it) {
-    const char *const symbol_start =
-        reinterpret_cast<const char *>(it->address);
-    const char *const symbol_end = symbol_start + it->symbol->st_size;
-    if (symbol_start <= address && address < symbol_end) {
-      if (info_out) {
-        // Client wants to know details for that symbol (the usual case).
-        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) {
-          // Strong symbol; just return it.
-          *info_out = *it;
-          return true;
-        } else {
-          // Weak or local. Record it, but keep looking for a strong one.
-          *info_out = *it;
-        }
-      } else {
-        // Client only cares if there is an overlapping symbol.
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
-    : index_(index), image_(image) {
-}
-
-const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
-  return &info_;
-}
-
-const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
-  return info_;
-}
-
-bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
-  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
-}
-
-bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
-  return !(*this == rhs);
-}
-
-ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
-  this->Update(1);
-  return *this;
-}
-
-ElfMemImage::SymbolIterator ElfMemImage::begin() const {
-  SymbolIterator it(this, 0);
-  it.Update(0);
-  return it;
-}
-
-ElfMemImage::SymbolIterator ElfMemImage::end() const {
-  return SymbolIterator(this, GetNumSymbols());
-}
-
-void ElfMemImage::SymbolIterator::Update(int increment) {
-  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
-  CHECK(image->IsPresent() || increment == 0);
-  if (!image->IsPresent()) {
-    return;
-  }
-  index_ += increment;
-  if (index_ >= image->GetNumSymbols()) {
-    index_ = image->GetNumSymbols();
-    return;
-  }
-  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
-  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
-  CHECK(symbol && version_symbol);
-  const char *const symbol_name = image->GetDynstr(symbol->st_name);
-  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
-  const ElfW(Verdef) *version_definition = NULL;
-  const char *version_name = "";
-  if (symbol->st_shndx == SHN_UNDEF) {
-    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
-    // version_index could well be greater than verdefnum_, so calling
-    // GetVerdef(version_index) may trigger assertion.
-  } else {
-    version_definition = image->GetVerdef(version_index);
-  }
-  if (version_definition) {
-    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
-    // optional 2nd if the version has a parent.
-    CHECK_LE(1, version_definition->vd_cnt);
-    CHECK_LE(version_definition->vd_cnt, 2);
-    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
-    version_name = image->GetVerstr(version_aux->vda_name);
-  }
-  info_.name    = symbol_name;
-  info_.version = version_name;
-  info_.address = image->GetSymAddr(symbol);
-  info_.symbol  = symbol;
-}
-
-}  // namespace base
-
-#if __clang__
-#pragma clang diagnostic pop
-#endif
-
-#endif  // HAVE_ELF_MEM_IMAGE
diff --git a/contrib/libtcmalloc/src/base/elf_mem_image.h b/contrib/libtcmalloc/src/base/elf_mem_image.h
deleted file mode 100644
index df63cf8b4da..00000000000
--- a/contrib/libtcmalloc/src/base/elf_mem_image.h
+++ /dev/null
@@ -1,135 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup for in-memory Elf images.
-
-#ifndef BASE_ELF_MEM_IMAGE_H_
-#define BASE_ELF_MEM_IMAGE_H_
-
-#include "../config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>   // for __GLIBC__
-#endif
-
-// Maybe one day we can rewrite this file not to require the elf
-// symbol extensions in glibc, but for right now we need them.
-#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)
-
-#define HAVE_ELF_MEM_IMAGE 1
-
-#include <stdlib.h>
-#include <link.h>  // for ElfW
-
-namespace base {
-
-// An in-memory ELF image (may not exist on disk).
-class ElfMemImage {
- public:
-  // Sentinel: there could never be an elf image at this address.
-  static const void *const kInvalidBase;
-
-  // Information about a single vdso symbol.
-  // All pointers are into .dynsym, .dynstr, or .text of the VDSO.
-  // Do not free() them or modify through them.
-  struct SymbolInfo {
-    const char      *name;      // E.g. "__vdso_getcpu"
-    const char      *version;   // E.g. "LINUX_2.6", could be ""
-                                // for unversioned symbol.
-    const void      *address;   // Relocated symbol address.
-    const ElfW(Sym) *symbol;    // Symbol in the dynamic symbol table.
-  };
-
-  // Supports iteration over all dynamic symbols.
-  class SymbolIterator {
-   public:
-    friend class ElfMemImage;
-    const SymbolInfo *operator->() const;
-    const SymbolInfo &operator*() const;
-    SymbolIterator& operator++();
-    bool operator!=(const SymbolIterator &rhs) const;
-    bool operator==(const SymbolIterator &rhs) const;
-   private:
-    SymbolIterator(const void *const image, int index);
-    void Update(int incr);
-    SymbolInfo info_;
-    int index_;
-    const void *const image_;
-  };
-
-
-  explicit ElfMemImage(const void *base);
-  void                 Init(const void *base);
-  bool                 IsPresent() const { return ehdr_ != NULL; }
-  const ElfW(Phdr)*    GetPhdr(int index) const;
-  const ElfW(Sym)*     GetDynsym(int index) const;
-  const ElfW(Versym)*  GetVersym(int index) const;
-  const ElfW(Verdef)*  GetVerdef(int index) const;
-  const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
-  const char*          GetDynstr(ElfW(Word) offset) const;
-  const void*          GetSymAddr(const ElfW(Sym) *sym) const;
-  const char*          GetVerstr(ElfW(Word) offset) const;
-  int                  GetNumSymbols() const;
-
-  SymbolIterator begin() const;
-  SymbolIterator end() const;
-
-  // Look up versioned dynamic symbol in the image.
-  // Returns false if image is not present, or doesn't contain given
-  // symbol/version/type combination.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbol(const char *name, const char *version,
-                    int symbol_type, SymbolInfo *info_out) const;
-
-  // Find info about symbol (if any) which overlaps given address.
-  // Returns true if symbol was found; false if image isn't present
-  // or doesn't have a symbol overlapping given address.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
-
- private:
-  const ElfW(Ehdr) *ehdr_;
-  const ElfW(Sym) *dynsym_;
-  const ElfW(Versym) *versym_;
-  const ElfW(Verdef) *verdef_;
-  const ElfW(Word) *hash_;
-  const char *dynstr_;
-  size_t strsize_;
-  size_t verdefnum_;
-  ElfW(Addr) link_base_;     // Link-time base (p_vaddr of first PT_LOAD).
-};
-
-}  // namespace base
-
-#endif  // __ELF__ and __GLIBC__ and !__native_client__
-
-#endif  // BASE_ELF_MEM_IMAGE_H_
diff --git a/contrib/libtcmalloc/src/base/elfcore.h b/contrib/libtcmalloc/src/base/elfcore.h
deleted file mode 100644
index 98fd23b6738..00000000000
--- a/contrib/libtcmalloc/src/base/elfcore.h
+++ /dev/null
@@ -1,401 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke, Carl Crous
- */
-
-#ifndef _ELFCORE_H
-#define _ELFCORE_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* We currently only support x86-32, x86-64, ARM, MIPS, PPC on Linux.
- * Porting to other related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__)) && defined(__linux)
-
-#include <stdarg.h>
-#include <stdint.h>
-#include <sys/types.h>
-#include "../config.h"
-
-
-/* Define the DUMPER symbol to make sure that there is exactly one
- * core dumper built into the library.
- */
-#define DUMPER "ELF"
-
-/* By the time that we get a chance to read CPU registers in the
- * calling thread, they are already in a not particularly useful
- * state. Besides, there will be multiple frames on the stack that are
- * just making the core file confusing. To fix this problem, we take a
- * snapshot of the frame pointer, stack pointer, and instruction
- * pointer at an earlier time, and then insert these values into the
- * core file.
- */
-
-#if defined(__i386__) || defined(__x86_64__)
-  typedef struct i386_regs {    /* Normal (non-FPU) CPU registers            */
-  #ifdef __x86_64__
-    #define BP rbp
-    #define SP rsp
-    #define IP rip
-    uint64_t  r15,r14,r13,r12,rbp,rbx,r11,r10;
-    uint64_t  r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
-    uint64_t  rip,cs,eflags;
-    uint64_t  rsp,ss;
-    uint64_t  fs_base, gs_base;
-    uint64_t  ds,es,fs,gs;
-  #else
-    #define BP ebp
-    #define SP esp
-    #define IP eip
-    uint32_t  ebx, ecx, edx, esi, edi, ebp, eax;
-    uint16_t  ds, __ds, es, __es;
-    uint16_t  fs, __fs, gs, __gs;
-    uint32_t  orig_eax, eip;
-    uint16_t  cs, __cs;
-    uint32_t  eflags, esp;
-    uint16_t  ss, __ss;
-  #endif
-  } i386_regs;
-#elif defined(__arm__)
-  typedef struct arm_regs {     /* General purpose registers                 */
-    #define BP uregs[11]        /* Frame pointer                             */
-    #define SP uregs[13]        /* Stack pointer                             */
-    #define IP uregs[15]        /* Program counter                           */
-    #define LR uregs[14]        /* Link register                             */
-    long uregs[18];
-  } arm_regs;
-#elif defined(__mips__)
-  typedef struct mips_regs {
-    unsigned long pad[6];       /* Unused padding to match kernel structures */
-    unsigned long uregs[32];    /* General purpose registers.                */
-    unsigned long hi;           /* Used for multiplication and division.     */
-    unsigned long lo;
-    unsigned long cp0_epc;      /* Program counter.                          */
-    unsigned long cp0_badvaddr;
-    unsigned long cp0_status;
-    unsigned long cp0_cause;
-    unsigned long unused;
-  } mips_regs;
-#elif defined (__PPC__)
-  typedef struct ppc_regs {
-    #define SP uregs[1]         /* Stack pointer                             */
-    #define IP rip              /* Program counter                           */
-    #define LR lr               /* Link register                             */
-    unsigned long uregs[32];	/* General Purpose Registers - r0-r31.       */
-    double        fpr[32];	/* Floating-Point Registers - f0-f31.        */
-    unsigned long rip;		/* Program counter.                          */
-    unsigned long msr;
-    unsigned long ccr;
-    unsigned long lr;
-    unsigned long ctr;
-    unsigned long xeq;
-    unsigned long mq;
-  } ppc_regs;
-#endif
-
-#if defined(__i386__) && defined(__GNUC__)
-  /* On x86 we provide an optimized version of the FRAME() macro, if the
-   * compiler supports a GCC-style asm() directive. This results in somewhat
-   * more accurate values for CPU registers.
-   */
-  typedef struct Frame {
-    struct i386_regs uregs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile (                               \
-                       "push %%ebp\n"                                 \
-                       "push %%ebx\n"                                 \
-                       "mov  %%ebx,0(%%eax)\n"                        \
-                       "mov  %%ecx,4(%%eax)\n"                        \
-                       "mov  %%edx,8(%%eax)\n"                        \
-                       "mov  %%esi,12(%%eax)\n"                       \
-                       "mov  %%edi,16(%%eax)\n"                       \
-                       "mov  %%ebp,20(%%eax)\n"                       \
-                       "mov  %%eax,24(%%eax)\n"                       \
-                       "mov  %%ds,%%ebx\n"                            \
-                       "mov  %%ebx,28(%%eax)\n"                       \
-                       "mov  %%es,%%ebx\n"                            \
-                       "mov  %%ebx,32(%%eax)\n"                       \
-                       "mov  %%fs,%%ebx\n"                            \
-                       "mov  %%ebx,36(%%eax)\n"                       \
-                       "mov  %%gs,%%ebx\n"                            \
-                       "mov  %%ebx, 40(%%eax)\n"                      \
-                       "call 0f\n"                                    \
-                     "0:pop %%ebx\n"                                  \
-                       "add  $1f-0b,%%ebx\n"                          \
-                       "mov  %%ebx,48(%%eax)\n"                       \
-                       "mov  %%cs,%%ebx\n"                            \
-                       "mov  %%ebx,52(%%eax)\n"                       \
-                       "pushf\n"                                      \
-                       "pop  %%ebx\n"                                 \
-                       "mov  %%ebx,56(%%eax)\n"                       \
-                       "mov  %%esp,%%ebx\n"                           \
-                       "add  $8,%%ebx\n"                              \
-                       "mov  %%ebx,60(%%eax)\n"                       \
-                       "mov  %%ss,%%ebx\n"                            \
-                       "mov  %%ebx,64(%%eax)\n"                       \
-                       "pop  %%ebx\n"                                 \
-                       "pop  %%ebp\n"                                 \
-                     "1:"                                             \
-                       : : "a" (&f) : "memory");                      \
-                     } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       errno = (f).errno_;                            \
-                       (r)   = (f).uregs;                             \
-                     } while (0)
-#elif defined(__x86_64__) && defined(__GNUC__)
-  /* The FRAME and SET_FRAME macros for x86_64.  */
-  typedef struct Frame {
-    struct i386_regs uregs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile (                               \
-                       "push %%rbp\n"                                 \
-                       "push %%rbx\n"                                 \
-                       "mov  %%r15,0(%%rax)\n"                        \
-                       "mov  %%r14,8(%%rax)\n"                        \
-                       "mov  %%r13,16(%%rax)\n"                       \
-                       "mov  %%r12,24(%%rax)\n"                       \
-                       "mov  %%rbp,32(%%rax)\n"                       \
-                       "mov  %%rbx,40(%%rax)\n"                       \
-                       "mov  %%r11,48(%%rax)\n"                       \
-                       "mov  %%r10,56(%%rax)\n"                       \
-                       "mov  %%r9,64(%%rax)\n"                        \
-                       "mov  %%r8,72(%%rax)\n"                        \
-                       "mov  %%rax,80(%%rax)\n"                       \
-                       "mov  %%rcx,88(%%rax)\n"                       \
-                       "mov  %%rdx,96(%%rax)\n"                       \
-                       "mov  %%rsi,104(%%rax)\n"                      \
-                       "mov  %%rdi,112(%%rax)\n"                      \
-                       "mov  %%ds,%%rbx\n"                            \
-                       "mov  %%rbx,184(%%rax)\n"                      \
-                       "mov  %%es,%%rbx\n"                            \
-                       "mov  %%rbx,192(%%rax)\n"                      \
-                       "mov  %%fs,%%rbx\n"                            \
-                       "mov  %%rbx,200(%%rax)\n"                      \
-                       "mov  %%gs,%%rbx\n"                            \
-                       "mov  %%rbx,208(%%rax)\n"                      \
-                       "call 0f\n"                                    \
-                     "0:pop %%rbx\n"                                  \
-                       "add  $1f-0b,%%rbx\n"                          \
-                       "mov  %%rbx,128(%%rax)\n"                      \
-                       "mov  %%cs,%%rbx\n"                            \
-                       "mov  %%rbx,136(%%rax)\n"                      \
-                       "pushf\n"                                      \
-                       "pop  %%rbx\n"                                 \
-                       "mov  %%rbx,144(%%rax)\n"                      \
-                       "mov  %%rsp,%%rbx\n"                           \
-                       "add  $16,%%ebx\n"                             \
-                       "mov  %%rbx,152(%%rax)\n"                      \
-                       "mov  %%ss,%%rbx\n"                            \
-                       "mov  %%rbx,160(%%rax)\n"                      \
-                       "pop  %%rbx\n"                                 \
-                       "pop  %%rbp\n"                                 \
-                     "1:"                                             \
-                       : : "a" (&f) : "memory");                      \
-                     } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       errno = (f).errno_;                            \
-                       (f).uregs.fs_base = (r).fs_base;               \
-                       (f).uregs.gs_base = (r).gs_base;               \
-                       (r)   = (f).uregs;                             \
-                     } while (0)
-#elif defined(__arm__) && defined(__GNUC__)
-  /* ARM calling conventions are a little more tricky. A little assembly
-   * helps in obtaining an accurate snapshot of all registers.
-   */
-  typedef struct Frame {
-    struct arm_regs arm;
-    int             errno_;
-    pid_t           tid;
-  } Frame;
-  #define FRAME(f) Frame f;                                           \
-                   do {                                               \
-                     long cpsr;                                       \
-                     f.errno_ = errno;                                \
-                     f.tid    = sys_gettid();                         \
-                     __asm__ volatile(                                \
-                       "stmia %0, {r0-r15}\n" /* All integer regs   */\
-                       : : "r"(&f.arm) : "memory");                   \
-                     f.arm.uregs[16] = 0;                             \
-                     __asm__ volatile(                                \
-                       "mrs %0, cpsr\n"       /* Condition code reg */\
-                       : "=r"(cpsr));                                 \
-                     f.arm.uregs[17] = cpsr;                          \
-                   } while (0)
-  #define SET_FRAME(f,r)                                              \
-                     do {                                             \
-                       /* Don't override the FPU status register.   */\
-                       /* Use the value obtained from ptrace(). This*/\
-                       /* works, because our code does not perform  */\
-                       /* any FPU operations, itself.               */\
-                       long fps      = (f).arm.uregs[16];             \
-                       errno         = (f).errno_;                    \
-                       (r)           = (f).arm;                       \
-                       (r).uregs[16] = fps;                           \
-                     } while (0)
-#elif defined(__mips__) && defined(__GNUC__)
-  typedef struct Frame {
-    struct mips_regs mips_regs;
-    int              errno_;
-    pid_t            tid;
-  } Frame;
-  #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; })
-  #define FRAME(f) Frame f = { 0 };                                   \
-                   do {                                               \
-                     unsigned long hi, lo;                            \
-                     register unsigned long pc __asm__("$31");        \
-                     f.mips_regs.uregs[ 0] = MIPSREG( 0);             \
-                     f.mips_regs.uregs[ 1] = MIPSREG( 1);             \
-                     f.mips_regs.uregs[ 2] = MIPSREG( 2);             \
-                     f.mips_regs.uregs[ 3] = MIPSREG( 3);             \
-                     f.mips_regs.uregs[ 4] = MIPSREG( 4);             \
-                     f.mips_regs.uregs[ 5] = MIPSREG( 5);             \
-                     f.mips_regs.uregs[ 6] = MIPSREG( 6);             \
-                     f.mips_regs.uregs[ 7] = MIPSREG( 7);             \
-                     f.mips_regs.uregs[ 8] = MIPSREG( 8);             \
-                     f.mips_regs.uregs[ 9] = MIPSREG( 9);             \
-                     f.mips_regs.uregs[10] = MIPSREG(10);             \
-                     f.mips_regs.uregs[11] = MIPSREG(11);             \
-                     f.mips_regs.uregs[12] = MIPSREG(12);             \
-                     f.mips_regs.uregs[13] = MIPSREG(13);             \
-                     f.mips_regs.uregs[14] = MIPSREG(14);             \
-                     f.mips_regs.uregs[15] = MIPSREG(15);             \
-                     f.mips_regs.uregs[16] = MIPSREG(16);             \
-                     f.mips_regs.uregs[17] = MIPSREG(17);             \
-                     f.mips_regs.uregs[18] = MIPSREG(18);             \
-                     f.mips_regs.uregs[19] = MIPSREG(19);             \
-                     f.mips_regs.uregs[20] = MIPSREG(20);             \
-                     f.mips_regs.uregs[21] = MIPSREG(21);             \
-                     f.mips_regs.uregs[22] = MIPSREG(22);             \
-                     f.mips_regs.uregs[23] = MIPSREG(23);             \
-                     f.mips_regs.uregs[24] = MIPSREG(24);             \
-                     f.mips_regs.uregs[25] = MIPSREG(25);             \
-                     f.mips_regs.uregs[26] = MIPSREG(26);             \
-                     f.mips_regs.uregs[27] = MIPSREG(27);             \
-                     f.mips_regs.uregs[28] = MIPSREG(28);             \
-                     f.mips_regs.uregs[29] = MIPSREG(29);             \
-                     f.mips_regs.uregs[30] = MIPSREG(30);             \
-                     f.mips_regs.uregs[31] = MIPSREG(31);             \
-                     __asm__ volatile ("mfhi %0" : "=r"(hi));         \
-                     __asm__ volatile ("mflo %0" : "=r"(lo));         \
-                     __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc));   \
-                     f.mips_regs.hi       = hi;                       \
-                     f.mips_regs.lo       = lo;                       \
-                     f.mips_regs.cp0_epc  = pc;                       \
-                     f.errno_             = errno;                    \
-                     f.tid                = sys_gettid();             \
-                   } while (0)
-  #define SET_FRAME(f,r)                                              \
-                   do {                                               \
-                     errno       = (f).errno_;                        \
-                     memcpy((r).uregs, (f).mips_regs.uregs,           \
-                            32*sizeof(unsigned long));                \
-                     (r).hi      = (f).mips_regs.hi;                  \
-                     (r).lo      = (f).mips_regs.lo;                  \
-                     (r).cp0_epc = (f).mips_regs.cp0_epc;             \
-                   } while (0)
-#else
-  /* If we do not have a hand-optimized assembly version of the FRAME()
-   * macro, we cannot reliably unroll the stack. So, we show a few additional
-   * stack frames for the coredumper.
-   */
-  typedef struct Frame {
-    pid_t tid;
-  } Frame;
-  #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0)
-  #define SET_FRAME(f,r) do { } while (0)
-#endif
-
-
-/* Internal function for generating a core file. This API can change without
- * notice and is only supposed to be used internally by the core dumper.
- *
- * This function works for both single- and multi-threaded core
- * dumps. If called as
- *
- *   FRAME(frame);
- *   InternalGetCoreDump(&frame, 0, NULL, ap);
- *
- * it creates a core file that only contains information about the
- * calling thread.
- *
- * Optionally, the caller can provide information about other threads
- * by passing their process ids in "thread_pids". The process id of
- * the caller should not be included in this array. All of the threads
- * must have been attached to with ptrace(), prior to calling this
- * function. They will be detached when "InternalGetCoreDump()" returns.
- *
- * This function either returns a file handle that can be read for obtaining
- * a core dump, or "-1" in case of an error. In the latter case, "errno"
- * will be set appropriately.
- *
- * While "InternalGetCoreDump()" is not technically async signal safe, you
- * might be tempted to invoke it from a signal handler. The code goes to
- * great lengths to make a best effort that this will actually work. But in
- * any case, you must make sure that you preserve the value of "errno"
- * yourself. It is guaranteed to be clobbered otherwise.
- *
- * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again,
- * it makes a best effort to behave reasonably when called in a multi-
- * threaded environment, but it is ultimately the caller's responsibility
- * to provide locking.
- */
-int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids,
-                        va_list ap
-                     /* const struct CoreDumpParameters *params,
-                        const char *file_name,
-                        const char *PATH
-                      */);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* _ELFCORE_H */
diff --git a/contrib/libtcmalloc/src/base/googleinit.h b/contrib/libtcmalloc/src/base/googleinit.h
deleted file mode 100644
index 3ea411a325a..00000000000
--- a/contrib/libtcmalloc/src/base/googleinit.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Jacob Hoffman-Andrews
-
-#ifndef _GOOGLEINIT_H
-#define _GOOGLEINIT_H
-
-#include "base/logging.h"
-
-class GoogleInitializer {
- public:
-  typedef void (*VoidFunction)(void);
-  GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor)
-      : name_(name), destructor_(dtor) {
-    RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_);
-    if (ctor)
-      ctor();
-  }
-  ~GoogleInitializer() {
-    RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_);
-    if (destructor_)
-      destructor_();
-  }
-
- private:
-  const char* const name_;
-  const VoidFunction destructor_;
-};
-
-#define REGISTER_MODULE_INITIALIZER(name, body)                 \
-  namespace {                                                   \
-    static void google_init_module_##name () { body; }          \
-    GoogleInitializer google_initializer_module_##name(#name,   \
-            google_init_module_##name, NULL);                   \
-  }
-
-#define REGISTER_MODULE_DESTRUCTOR(name, body)                  \
-  namespace {                                                   \
-    static void google_destruct_module_##name () { body; }      \
-    GoogleInitializer google_destructor_module_##name(#name,    \
-            NULL, google_destruct_module_##name);               \
-  }
-
-
-#endif /* _GOOGLEINIT_H */
diff --git a/contrib/libtcmalloc/src/base/linux_syscall_support.h b/contrib/libtcmalloc/src/base/linux_syscall_support.h
deleted file mode 100644
index 6a94dc3fc72..00000000000
--- a/contrib/libtcmalloc/src/base/linux_syscall_support.h
+++ /dev/null
@@ -1,2880 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-/* This file includes Linux-specific support functions common to the
- * coredumper and the thread lister; primarily, this is a collection
- * of direct system calls, and a couple of symbols missing from
- * standard header files.
- * There are a few options that the including file can set to control
- * the behavior of this file:
- *
- * SYS_CPLUSPLUS:
- *   The entire header file will normally be wrapped in 'extern "C" { }",
- *   making it suitable for compilation as both C and C++ source. If you
- *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
- *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite
- *   system header files, too. It is the caller's responsibility to provide
- *   the necessary definitions.
- *
- * SYS_ERRNO:
- *   All system calls will update "errno" unless overriden by setting the
- *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
- *   an l-value.
- *
- * SYS_INLINE:
- *   New symbols will be defined "static inline", unless overridden by
- *   the SYS_INLINE macro.
- *
- * SYS_LINUX_SYSCALL_SUPPORT_H
- *   This macro is used to avoid multiple inclusions of this header file.
- *   If you need to include this file more than once, make sure to
- *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
- *
- * SYS_PREFIX:
- *   New system calls will have a prefix of "sys_" unless overridden by
- *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which
- *   results in prefixes "sys[0..9]_". It is also possible to set this
- *   macro to -1, which avoids all prefixes.
- *
- * This file defines a few internal symbols that all start with "LSS_".
- * Do not access these symbols from outside this file. They are not part
- * of the supported API.
- *
- * NOTE: This is a stripped down version of the official opensource
- * version of linux_syscall_support.h, which lives at
- *    http://code.google.com/p/linux-syscall-support/
- * It includes only the syscalls that are used in perftools, plus a
- * few extra.  Here's the breakdown:
- * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
- *      sys__exit(
- *      sys_clone(
- *      sys_close(
- *      sys_fcntl(
- *      sys_fstat(
- *      sys_futex(
- *      sys_getcpu(
- *      sys_getdents64(
- *      sys_getppid(
- *      sys_gettid(
- *      sys_lseek(
- *      sys_mmap(
- *      sys_mremap(
- *      sys_munmap(
- *      sys_open(
- *      sys_pipe(
- *      sys_prctl(
- *      sys_ptrace(
- *      sys_ptrace_detach(
- *      sys_read(
- *      sys_sched_yield(
- *      sys_sigaction(
- *      sys_sigaltstack(
- *      sys_sigdelset(
- *      sys_sigfillset(
- *      sys_sigprocmask(
- *      sys_socket(
- *      sys_stat(
- *      sys_waitpid(
- * 2) These are used as subroutines of the above:
- *      sys_getpid       -- gettid
- *      sys_kill         -- ptrace_detach
- *      sys_restore      -- sigaction
- *      sys_restore_rt   -- sigaction
- *      sys_socketcall   -- socket
- *      sys_wait4        -- waitpid
- * 3) I left these in even though they're not used.  They either
- * complement the above (write vs read) or are variants (rt_sigaction):
- *      sys_fstat64
- *      sys_llseek
- *      sys_mmap2
- *      sys_openat
- *      sys_getdents
- *      sys_rt_sigaction
- *      sys_rt_sigprocmask
- *      sys_sigaddset
- *      sys_sigemptyset
- *      sys_stat64
- *      sys_write
- */
-#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
-#define SYS_LINUX_SYSCALL_SUPPORT_H
-
-/* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64, Aarch64, s390 and s390x
- * on Linux.
- * Porting to other related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__) || \
-     defined(__aarch64__) || defined(__s390__)) \
-  && (defined(__linux))
-
-#ifndef SYS_CPLUSPLUS
-#ifdef __cplusplus
-/* Some system header files in older versions of gcc neglect to properly
- * handle being included from C++. As it appears to be harmless to have
- * multiple nested 'extern "C"' blocks, just add another one here.
- */
-extern "C" {
-#endif
-
-#include <errno.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <syscall.h>
-#include <unistd.h>
-#include <linux/unistd.h>
-#include <endian.h>
-
-#ifdef __mips__
-/* Include definitions of the ABI currently in use.                          */
-#include <sgidefs.h>
-#endif
-
-#endif
-
-/* As glibc often provides subtly incompatible data structures (and implicit
- * wrapper functions that convert them), we provide our own kernel data
- * structures for use by the system calls.
- * These structures have been developed by using Linux 2.6.23 headers for
- * reference. Note though, we do not care about exact API compatibility
- * with the kernel, and in fact the kernel often does not have a single
- * API that works across architectures. Instead, we try to mimic the glibc
- * API where reasonable, and only guarantee ABI compatibility with the
- * kernel headers.
- * Most notably, here are a few changes that were made to the structures
- * defined by kernel headers:
- *
- * - we only define structures, but not symbolic names for kernel data
- *   types. For the latter, we directly use the native C datatype
- *   (i.e. "unsigned" instead of "mode_t").
- * - in a few cases, it is possible to define identical structures for
- *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
- *   standardizing on the 64bit version of the data types. In particular,
- *   this means that we use "unsigned" where the 32bit headers say
- *   "unsigned long".
- * - overall, we try to minimize the number of cases where we need to
- *   conditionally define different structures.
- * - the "struct kernel_sigaction" class of structures have been
- *   modified to more closely mimic glibc's API by introducing an
- *   anonymous union for the function pointer.
- * - a small number of field names had to have an underscore appended to
- *   them, because glibc defines a global macro by the same name.
- */
-
-/* include/linux/dirent.h                                                    */
-struct kernel_dirent64 {
-  unsigned long long d_ino;
-  long long          d_off;
-  unsigned short     d_reclen;
-  unsigned char      d_type;
-  char               d_name[256];
-};
-
-/* include/linux/dirent.h                                                    */
-struct kernel_dirent {
-  long               d_ino;
-  long               d_off;
-  unsigned short     d_reclen;
-  char               d_name[256];
-};
-
-/* include/linux/time.h                                                      */
-struct kernel_timespec {
-  long               tv_sec;
-  long               tv_nsec;
-};
-
-/* include/linux/time.h                                                      */
-struct kernel_timeval {
-  long               tv_sec;
-  long               tv_usec;
-};
-
-/* include/linux/resource.h                                                  */
-struct kernel_rusage {
-  struct kernel_timeval ru_utime;
-  struct kernel_timeval ru_stime;
-  long               ru_maxrss;
-  long               ru_ixrss;
-  long               ru_idrss;
-  long               ru_isrss;
-  long               ru_minflt;
-  long               ru_majflt;
-  long               ru_nswap;
-  long               ru_inblock;
-  long               ru_oublock;
-  long               ru_msgsnd;
-  long               ru_msgrcv;
-  long               ru_nsignals;
-  long               ru_nvcsw;
-  long               ru_nivcsw;
-};
-
-#if defined(__i386__) || defined(__arm__) \
-  || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__))
-
-/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
-struct kernel_old_sigaction {
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  unsigned long      sa_mask;
-  unsigned long      sa_flags;
-  void               (*sa_restorer)(void);
-} __attribute__((packed,aligned(4)));
-#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
-  #define kernel_old_sigaction kernel_sigaction
-#elif defined(__aarch64__)
-  // No kernel_old_sigaction defined for arm64.
-#endif
-
-/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
- * exactly match the size of the signal set, even though the API was
- * intended to be extensible. We define our own KERNEL_NSIG to deal with
- * this.
- * Please note that glibc provides signals [1.._NSIG-1], whereas the
- * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
- * actual number of signals is obviously the same, but the constants
- * differ by one.
- */
-#ifdef __mips__
-#define KERNEL_NSIG 128
-#else
-#define KERNEL_NSIG  64
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
-struct kernel_sigset_t {
-  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
-                    (8*sizeof(unsigned long))];
-};
-
-/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h                   */
-struct kernel_sigaction {
-#ifdef __mips__
-  unsigned long      sa_flags;
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  struct kernel_sigset_t sa_mask;
-#else
-  union {
-    void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, siginfo_t *, void *);
-  };
-  unsigned long      sa_flags;
-  void               (*sa_restorer)(void);
-  struct kernel_sigset_t sa_mask;
-#endif
-};
-
-/* include/asm-{arm,i386,mips,ppc,s390}/stat.h                               */
-#ifdef __mips__
-#if _MIPS_SIM == _MIPS_SIM_ABI64
-struct kernel_stat {
-#else
-struct kernel_stat64 {
-#endif
-  unsigned           st_dev;
-  unsigned           __pad0[3];
-  unsigned long long st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           st_rdev;
-  unsigned           __pad1[3];
-  long long          st_size;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned           st_blksize;
-  unsigned           __pad2;
-  unsigned long long st_blocks;
-};
-#elif defined __PPC__
-struct kernel_stat64 {
-  unsigned long long st_dev;
-  unsigned long long st_ino;
-  unsigned           st_nlink;
-  unsigned           st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  int                __pad2;
-  unsigned long long st_rdev;
-  long long          st_size;
-  long long          st_blksize;
-  long long          st_blocks;
-  kernel_timespec    st_atim;
-  kernel_timespec    st_mtim;
-  kernel_timespec    st_ctim;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-  unsigned long      __unused6;
-};
-#else
-struct kernel_stat64 {
-  unsigned long long st_dev;
-  unsigned char      __pad0[4];
-  unsigned           __st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned long long st_rdev;
-  unsigned char      __pad3[4];
-  long long          st_size;
-  unsigned           st_blksize;
-  unsigned long long st_blocks;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned long long st_ino;
-};
-#endif
-
-/* include/asm-{arm,generic,i386,mips,x86_64,ppc,s390}/stat.h                     */
-#if defined(__i386__) || defined(__arm__)
-struct kernel_stat {
-  /* The kernel headers suggest that st_dev and st_rdev should be 32bit
-   * quantities encoding 12bit major and 20bit minor numbers in an interleaved
-   * format. In reality, we do not see useful data in the top bits. So,
-   * we'll leave the padding in here, until we find a better solution.
-   */
-  unsigned short     st_dev;
-  short              pad1;
-  unsigned           st_ino;
-  unsigned short     st_mode;
-  unsigned short     st_nlink;
-  unsigned short     st_uid;
-  unsigned short     st_gid;
-  unsigned short     st_rdev;
-  short              pad2;
-  unsigned           st_size;
-  unsigned           st_blksize;
-  unsigned           st_blocks;
-  unsigned           st_atime_;
-  unsigned           st_atime_nsec_;
-  unsigned           st_mtime_;
-  unsigned           st_mtime_nsec_;
-  unsigned           st_ctime_;
-  unsigned           st_ctime_nsec_;
-  unsigned           __unused4;
-  unsigned           __unused5;
-};
-#elif defined(__x86_64__)
-struct kernel_stat {
-  uint64_t           st_dev;
-  uint64_t           st_ino;
-  uint64_t           st_nlink;
-  unsigned           st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           __pad0;
-  uint64_t           st_rdev;
-  int64_t            st_size;
-  int64_t            st_blksize;
-  int64_t            st_blocks;
-  uint64_t           st_atime_;
-  uint64_t           st_atime_nsec_;
-  uint64_t           st_mtime_;
-  uint64_t           st_mtime_nsec_;
-  uint64_t           st_ctime_;
-  uint64_t           st_ctime_nsec_;
-  int64_t            __unused[3];
-};
-#elif defined(__PPC__)
-struct kernel_stat {
-  unsigned long long st_dev;
-  unsigned long      st_ino;
-  unsigned long      st_nlink;
-  unsigned long      st_mode;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  int                __pad2;
-  unsigned long long st_rdev;
-  long               st_size;
-  unsigned long      st_blksize;
-  unsigned long      st_blocks;
-  kernel_timespec    st_atim;
-  kernel_timespec    st_mtim;
-  kernel_timespec    st_ctim;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-  unsigned long      __unused6;
-};
-#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
-struct kernel_stat {
-  unsigned           st_dev;
-  int                st_pad1[3];
-  unsigned           st_ino;
-  unsigned           st_mode;
-  unsigned           st_nlink;
-  unsigned           st_uid;
-  unsigned           st_gid;
-  unsigned           st_rdev;
-  int                st_pad2[2];
-  long               st_size;
-  int                st_pad3;
-  long               st_atime_;
-  long               st_atime_nsec_;
-  long               st_mtime_;
-  long               st_mtime_nsec_;
-  long               st_ctime_;
-  long               st_ctime_nsec_;
-  int                st_blksize;
-  int                st_blocks;
-  int                st_pad4[14];
-};
-#elif defined(__aarch64__)
-struct kernel_stat {
-  unsigned long      st_dev;
-  unsigned long      st_ino;
-  unsigned int       st_mode;
-  unsigned int       st_nlink;
-  unsigned int       st_uid;
-  unsigned int       st_gid;
-  unsigned long      st_rdev;
-  unsigned long      __pad1;
-  long               st_size;
-  int                st_blksize;
-  int                __pad2;
-  long               st_blocks;
-  long               st_atime_;
-  unsigned long      st_atime_nsec_;
-  long               st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  long               st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned int       __unused4;
-  unsigned int       __unused5;
-};
-#elif defined(__s390x__)
-struct kernel_stat {
-  unsigned long      st_dev;
-  unsigned long      st_ino;
-  unsigned long      st_nlink;
-  unsigned int       st_mode;
-  unsigned int       st_uid;
-  unsigned int       st_gid;
-  unsigned int       __pad1;
-  unsigned long      st_rdev;
-  unsigned long      st_size;
-  unsigned long      st_atime_;
-  unsigned long      st_atime_nsec_;
-  unsigned long      st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  unsigned long      st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned long      st_blksize;
-  long               st_blocks;
-  unsigned long      __unused[3];
-};
-#elif defined(__s390__)
-struct kernel_stat {
-  unsigned short     st_dev;
-  unsigned short     __pad1;
-  unsigned long      st_ino;
-  unsigned short     st_mode;
-  unsigned short     st_nlink;
-  unsigned short     st_uid;
-  unsigned short     st_gid;
-  unsigned short     st_rdev;
-  unsigned short     __pad2;
-  unsigned long      st_size;
-  unsigned long      st_blksize;
-  unsigned long      st_blocks;
-  unsigned long      st_atime_;
-  unsigned long      st_atime_nsec_;
-  unsigned long      st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  unsigned long      st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  unsigned long      __unused4;
-  unsigned long      __unused5;
-};
-#endif
-
-
-/* Definitions missing from the standard header files                        */
-#ifndef O_DIRECTORY
-#if defined(__arm__)
-#define O_DIRECTORY             0040000
-#else
-#define O_DIRECTORY             0200000
-#endif
-#endif
-#ifndef PR_GET_DUMPABLE
-#define PR_GET_DUMPABLE         3
-#endif
-#ifndef PR_SET_DUMPABLE
-#define PR_SET_DUMPABLE         4
-#endif
-#ifndef AT_FDCWD
-#define AT_FDCWD                (-100)
-#endif
-#ifndef AT_SYMLINK_NOFOLLOW
-#define AT_SYMLINK_NOFOLLOW     0x100
-#endif
-#ifndef AT_REMOVEDIR
-#define AT_REMOVEDIR            0x200
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED            2
-#endif
-#ifndef SA_RESTORER
-#define SA_RESTORER             0x04000000
-#endif
-
-#if defined(__i386__)
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            197
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         220
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             224
-#endif
-#ifndef __NR_futex
-#define __NR_futex              240
-#endif
-#ifndef __NR_openat
-#define __NR_openat             295
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             318
-#endif
-/* End of i386 definitions                                                   */
-#elif defined(__arm__)
-#ifndef __syscall
-#if defined(__thumb__) || defined(__ARM_EABI__)
-#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
-#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
-#define __syscall(name) "swi\t0"
-#define __syscall_safe(name)                     \
-  "push  {r7}\n"                                 \
-  "mov   r7,%[sysreg]\n"                         \
-  __syscall(name)"\n"                            \
-  "pop   {r7}"
-#else
-#define __SYS_REG(name)
-#define __SYS_REG_LIST(regs...) regs
-#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
-#define __syscall_safe(name) __syscall(name)
-#endif
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
-#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             (__NR_SYSCALL_BASE + 195)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_SYSCALL_BASE + 240)
-#endif
-/* End of ARM definitions                                                  */
-#elif defined(__x86_64__)
-#ifndef __NR_gettid
-#define __NR_gettid             186
-#endif
-#ifndef __NR_futex
-#define __NR_futex              202
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         217
-#endif
-#ifndef __NR_openat
-#define __NR_openat             257
-#endif
-/* End of x86-64 definitions                                                 */
-#elif defined(__mips__)
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       (__NR_Linux + 194)
-#define __NR_rt_sigprocmask     (__NR_Linux + 195)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             (__NR_Linux + 213)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            (__NR_Linux + 215)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         (__NR_Linux + 219)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 222)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 238)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 288)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 293)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 312)
-#endif
-/* End of MIPS (old 32bit API) definitions */
-#elif  _MIPS_SIM == _MIPS_SIM_ABI64
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 178)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 194)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 247)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 252)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 271)
-#endif
-/* End of MIPS (64bit API) definitions */
-#else
-#ifndef __NR_gettid
-#define __NR_gettid             (__NR_Linux + 178)
-#endif
-#ifndef __NR_futex
-#define __NR_futex              (__NR_Linux + 194)
-#endif
-#ifndef __NR_openat
-#define __NR_openat             (__NR_Linux + 251)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat            (__NR_Linux + 256)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_Linux + 275)
-#endif
-/* End of MIPS (new 32bit API) definitions                                   */
-#endif
-/* End of MIPS definitions                                                   */
-#elif defined(__PPC__)
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       173
-#define __NR_rt_sigprocmask     174
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64             195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64            197
-#endif
-#ifndef __NR_socket
-#define __NR_socket             198
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         202
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             207
-#endif
-#ifndef __NR_futex
-#define __NR_futex              221
-#endif
-#ifndef __NR_openat
-#define __NR_openat             286
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             302
-#endif
-/* End of powerpc defininitions                                              */
-#elif defined(__aarch64__)
-#ifndef __NR_fstatat
-#define __NR_fstatat             79
-#endif
-/* End of aarch64 defininitions                                              */
-#elif defined(__s390__)
-#ifndef __NR_quotactl
-#define __NR_quotactl           131
-#endif
-#ifndef __NR_rt_sigreturn
-#define __NR_rt_sigreturn       173
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigaction       174
-#endif
-#ifndef __NR_rt_sigprocmask
-#define __NR_rt_sigprocmask     175
-#endif
-#ifndef __NR_rt_sigpending
-#define __NR_rt_sigpending      176
-#endif
-#ifndef __NR_rt_sigsuspend
-#define __NR_rt_sigsuspend      179
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            180
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           181
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64         220
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead          222
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           224
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          225
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           227
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          228
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          230
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         231
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid             236
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill              237
-#endif
-#ifndef __NR_futex
-#define __NR_futex              238
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  239
-#endif
-#ifndef __NR_sched_getaffinity
-#define __NR_sched_getaffinity  240
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    252
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      260
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       261
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           265
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          266
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         282
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         283
-#endif
-#ifndef __NR_openat
-#define __NR_openat             288
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           294
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         310
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             311
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate          314
-#endif
-/* Some syscalls are named/numbered differently between s390 and s390x. */
-#ifdef __s390x__
-# ifndef __NR_getrlimit
-# define __NR_getrlimit          191
-# endif
-# ifndef __NR_setresuid
-# define __NR_setresuid          208
-# endif
-# ifndef __NR_getresuid
-# define __NR_getresuid          209
-# endif
-# ifndef __NR_setresgid
-# define __NR_setresgid          210
-# endif
-# ifndef __NR_getresgid
-# define __NR_getresgid          211
-# endif
-# ifndef __NR_setfsuid
-# define __NR_setfsuid           215
-# endif
-# ifndef __NR_setfsgid
-# define __NR_setfsgid           216
-# endif
-# ifndef __NR_fadvise64
-# define __NR_fadvise64          253
-# endif
-# ifndef __NR_newfstatat
-# define __NR_newfstatat         293
-# endif
-#else /* __s390x__ */
-# ifndef __NR_getrlimit
-# define __NR_getrlimit          76
-# endif
-# ifndef __NR_setfsuid
-# define __NR_setfsuid           138
-# endif
-# ifndef __NR_setfsgid
-# define __NR_setfsgid           139
-# endif
-# ifndef __NR_setresuid
-# define __NR_setresuid          164
-# endif
-# ifndef __NR_getresuid
-# define __NR_getresuid          165
-# endif
-# ifndef __NR_setresgid
-# define __NR_setresgid          170
-# endif
-# ifndef __NR_getresgid
-# define __NR_getresgid          171
-# endif
-# ifndef __NR_ugetrlimit
-# define __NR_ugetrlimit         191
-# endif
-# ifndef __NR_mmap2
-# define __NR_mmap2              192
-# endif
-# ifndef __NR_setresuid32
-# define __NR_setresuid32        208
-# endif
-# ifndef __NR_getresuid32
-# define __NR_getresuid32        209
-# endif
-# ifndef __NR_setresgid32
-# define __NR_setresgid32        210
-# endif
-# ifndef __NR_getresgid32
-# define __NR_getresgid32        211
-# endif
-# ifndef __NR_setfsuid32
-# define __NR_setfsuid32         215
-# endif
-# ifndef __NR_setfsgid32
-# define __NR_setfsgid32         216
-# endif
-# ifndef __NR_fadvise64_64
-# define __NR_fadvise64_64       264
-# endif
-# ifndef __NR_fstatat64
-# define __NR_fstatat64          293
-# endif
-#endif /* __s390__ */
-/* End of s390/s390x definitions                                             */
-#endif
-
-
-/* After forking, we must make sure to only call system calls.               */
-#if __BOUNDED_POINTERS__
-  #error "Need to port invocations of syscalls for bounded ptrs"
-#else
-  /* The core dumper and the thread lister get executed after threads
-   * have been suspended. As a consequence, we cannot call any functions
-   * that acquire locks. Unfortunately, libc wraps most system calls
-   * (e.g. in order to implement pthread_atfork, and to make calls
-   * cancellable), which means we cannot call these functions. Instead,
-   * we have to call syscall() directly.
-   */
-  #undef LSS_ERRNO
-  #ifdef SYS_ERRNO
-    /* Allow the including file to override the location of errno. This can
-     * be useful when using clone() with the CLONE_VM option.
-     */
-    #define LSS_ERRNO SYS_ERRNO
-  #else
-    #define LSS_ERRNO errno
-  #endif
-
-  #undef LSS_INLINE
-  #ifdef SYS_INLINE
-    #define LSS_INLINE SYS_INLINE
-  #else
-    #define LSS_INLINE static inline
-  #endif
-
-  /* Allow the including file to override the prefix used for all new
-   * system calls. By default, it will be set to "sys_".
-   */
-  #undef LSS_NAME
-  #ifndef SYS_PREFIX
-    #define LSS_NAME(name) sys_##name
-  #elif SYS_PREFIX < 0
-    #define LSS_NAME(name) name
-  #elif SYS_PREFIX == 0
-    #define LSS_NAME(name) sys0_##name
-  #elif SYS_PREFIX == 1
-    #define LSS_NAME(name) sys1_##name
-  #elif SYS_PREFIX == 2
-    #define LSS_NAME(name) sys2_##name
-  #elif SYS_PREFIX == 3
-    #define LSS_NAME(name) sys3_##name
-  #elif SYS_PREFIX == 4
-    #define LSS_NAME(name) sys4_##name
-  #elif SYS_PREFIX == 5
-    #define LSS_NAME(name) sys5_##name
-  #elif SYS_PREFIX == 6
-    #define LSS_NAME(name) sys6_##name
-  #elif SYS_PREFIX == 7
-    #define LSS_NAME(name) sys7_##name
-  #elif SYS_PREFIX == 8
-    #define LSS_NAME(name) sys8_##name
-  #elif SYS_PREFIX == 9
-    #define LSS_NAME(name) sys9_##name
-  #endif
-
-  #undef  LSS_RETURN
-  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) ||        \
-       defined(__aarch64__) || defined(__s390__))
-  /* Failing system calls return a negative result in the range of
-   * -1..-4095. These are "errno" values with the sign inverted.
-   */
-  #define LSS_RETURN(type, res)                                               \
-    do {                                                                      \
-      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \
-        LSS_ERRNO = -(res);                                                   \
-        res = -1;                                                             \
-      }                                                                       \
-      return (type) (res);                                                    \
-    } while (0)
-  #elif defined(__mips__)
-  /* On MIPS, failing system calls return -1, and set errno in a
-   * separate CPU register.
-   */
-  #define LSS_RETURN(type, res, err)                                          \
-    do {                                                                      \
-      if (err) {                                                              \
-        LSS_ERRNO = (res);                                                    \
-        res = -1;                                                             \
-      }                                                                       \
-      return (type) (res);                                                    \
-    } while (0)
-  #elif defined(__PPC__)
-  /* On PPC, failing system calls return -1, and set errno in a
-   * separate CPU register. See linux/unistd.h.
-   */
-  #define LSS_RETURN(type, res, err)                                          \
-   do {                                                                       \
-     if (err & 0x10000000 ) {                                                 \
-       LSS_ERRNO = (res);                                                     \
-       res = -1;                                                              \
-     }                                                                        \
-     return (type) (res);                                                     \
-   } while (0)
-  #endif
-  #if defined(__i386__)
-    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
-      /* This only works for GCC-4.4 and above -- the first version to use
-         .cfi directives for dwarf unwind info.  */
-      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
-                  ".cfi_adjust_cfa_offset " #adjust "\n"
-    #else
-      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
-    #endif
-
-    /* In PIC mode (e.g. when building shared libraries), gcc for i386
-     * reserves ebx. Unfortunately, most distribution ship with implementations
-     * of _syscallX() which clobber ebx.
-     * Also, most definitions of _syscallX() neglect to mark "memory" as being
-     * clobbered. This causes problems with compilers, that do a better job
-     * at optimizing across __asm__ calls.
-     * So, we just have to redefine all of the _syscallX() macros.
-     */
-    #undef  LSS_BODY
-    #define LSS_BODY(type,args...)                                            \
-      long __res;                                                             \
-      __asm__ __volatile__("push %%ebx\n"                                     \
-                           CFI_ADJUST_CFA_OFFSET(4)                           \
-                           "movl %2,%%ebx\n"                                  \
-                           "int $0x80\n"                                      \
-                           "pop %%ebx\n"                                      \
-                           CFI_ADJUST_CFA_OFFSET(-4)                          \
-                           args                                               \
-                           : "esp", "memory");                                \
-      LSS_RETURN(type,__res)
-    #undef  _syscall0
-    #define _syscall0(type,name)                                              \
-      type LSS_NAME(name)(void) {                                             \
-        long __res;                                                           \
-        __asm__ volatile("int $0x80"                                          \
-                         : "=a" (__res)                                       \
-                         : "0" (__NR_##name)                                  \
-                         : "memory");                                         \
-        LSS_RETURN(type,__res);                                               \
-      }
-    #undef  _syscall1
-    #define _syscall1(type,name,type1,arg1)                                   \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \
-      }
-    #undef  _syscall2
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
-      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \
-      }
-    #undef  _syscall3
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
-      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
-               "d" ((long)(arg3)));                                           \
-      }
-    #undef  _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_BODY(type,                                                        \
-             : "=a" (__res)                                                   \
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
-               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \
-      }
-    #undef  _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        long __res;                                                           \
-        __asm__ __volatile__("push %%ebx\n"                                   \
-                             "movl %2,%%ebx\n"                                \
-                             "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
-                             "pop  %%ebx"                                     \
-                             : "=a" (__res)                                   \
-                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "esp", "memory");                              \
-        LSS_RETURN(type,__res);                                               \
-      }
-    #undef  _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        long __res;                                                           \
-        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \
-        __asm__ __volatile__("push %%ebp\n"                                   \
-                             "push %%ebx\n"                                   \
-                             "movl 4(%2),%%ebp\n"                             \
-                             "movl 0(%2), %%ebx\n"                            \
-                             "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
-                             "pop  %%ebx\n"                                   \
-                             "pop  %%ebp"                                     \
-                             : "=a" (__res)                                   \
-                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "esp", "memory");                              \
-        LSS_RETURN(type,__res);                                               \
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __res;
-      __asm__ __volatile__(/* if (fn == NULL)
-                            *   return -EINVAL;
-                            */
-                           "movl   %3,%%ecx\n"
-                           "jecxz  1f\n"
-
-                           /* if (child_stack == NULL)
-                            *   return -EINVAL;
-                            */
-                           "movl   %4,%%ecx\n"
-                           "jecxz  1f\n"
-
-                           /* Set up alignment of the child stack:
-                            * child_stack = (child_stack & ~0xF) - 20;
-                            */
-                           "andl   $-16,%%ecx\n"
-                           "subl   $20,%%ecx\n"
-
-                           /* Push "arg" and "fn" onto the stack that will be
-                            * used by the child.
-                            */
-                           "movl   %6,%%eax\n"
-                           "movl   %%eax,4(%%ecx)\n"
-                           "movl   %3,%%eax\n"
-                           "movl   %%eax,(%%ecx)\n"
-
-                           /* %eax = syscall(%eax = __NR_clone,
-                            *                %ebx = flags,
-                            *                %ecx = child_stack,
-                            *                %edx = parent_tidptr,
-                            *                %esi = newtls,
-                            *                %edi = child_tidptr)
-                            * Also, make sure that %ebx gets preserved as it is
-                            * used in PIC mode.
-                            */
-                           "movl   %8,%%esi\n"
-                           "movl   %7,%%edx\n"
-                           "movl   %5,%%eax\n"
-                           "movl   %9,%%edi\n"
-                           "pushl  %%ebx\n"
-                           "movl   %%eax,%%ebx\n"
-                           "movl   %2,%%eax\n"
-                           "int    $0x80\n"
-
-                           /* In the parent: restore %ebx
-                            * In the child:  move "fn" into %ebx
-                            */
-                           "popl   %%ebx\n"
-
-                           /* if (%eax != 0)
-                            *   return %eax;
-                            */
-                           "test   %%eax,%%eax\n"
-                           "jnz    1f\n"
-
-                           /* In the child, now. Terminate frame pointer chain.
-                            */
-                           "movl   $0,%%ebp\n"
-
-                           /* Call "fn". "arg" is already on the stack.
-                            */
-                           "call   *%%ebx\n"
-
-                           /* Call _exit(%ebx). Unfortunately older versions
-                            * of gcc restrict the number of arguments that can
-                            * be passed to asm(). So, we need to hard-code the
-                            * system call number.
-                            */
-                           "movl   %%eax,%%ebx\n"
-                           "movl   $1,%%eax\n"
-                           "int    $0x80\n"
-
-                           /* Return to parent.
-                            */
-                         "1:\n"
-                           : "=a" (__res)
-                           : "0"(-EINVAL), "i"(__NR_clone),
-                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
-                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
-                           : "esp", "memory", "ecx", "edx", "esi", "edi");
-      LSS_RETURN(int, __res);
-    }
-
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
-      /* On i386, the kernel does not know how to return from a signal
-       * handler. Instead, it relies on user space to provide a
-       * restorer function that calls the {rt_,}sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      void (*res)(void);
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:movl   %1,%%eax\n"
-                           "int    $0x80\n"
-                         "2:popl   %0\n"
-                           "addl   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_rt_sigreturn));
-      return res;
-    }
-    LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
-      /* On i386, the kernel does not know how to return from a signal
-       * handler. Instead, it relies on user space to provide a
-       * restorer function that calls the {rt_,}sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      void (*res)(void);
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:pop    %%eax\n"
-                           "movl   %1,%%eax\n"
-                           "int    $0x80\n"
-                         "2:popl   %0\n"
-                           "addl   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_sigreturn));
-      return res;
-    }
-  #elif defined(__x86_64__)
-    /* There are no known problems with any of the _syscallX() macros
-     * currently shipping for x86_64, but we still need to be able to define
-     * our own version so that we can override the location of the errno
-     * location (e.g. when using the clone() system call with the CLONE_VM
-     * option).
-     */
-    #undef  LSS_ENTRYPOINT
-    #define LSS_ENTRYPOINT "syscall\n"
-
-    /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
-     * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
-     * sign extension.  We can't cast pointers directly because those are
-     * 32 bits, and gcc will dump ugly warnings about casting from a pointer
-     * to an integer of a different size.
-     */
-    #undef  LSS_SYSCALL_ARG
-    #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
-    #undef  _LSS_RETURN
-    #define _LSS_RETURN(type, res, cast)                                      \
-      do {                                                                    \
-        if ((uint64_t)(res) >= (uint64_t)(-4095)) {                           \
-          LSS_ERRNO = -(res);                                                 \
-          res = -1;                                                           \
-        }                                                                     \
-        return (type)(cast)(res);                                             \
-      } while (0)
-    #undef  LSS_RETURN
-    #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
-
-    #undef  _LSS_BODY
-    #define _LSS_BODY(nr, type, name, cast, ...)                              \
-          long long __res;                                                    \
-          __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT                \
-            : "=a" (__res)                                                    \
-            : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__)                 \
-            : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory");                   \
-          _LSS_RETURN(type, __res, cast)
-    #undef  LSS_BODY
-    #define LSS_BODY(nr, type, name, args...) \
-      _LSS_BODY(nr, type, name, uintptr_t, ## args)
-
-    #undef  LSS_BODY_ASM0
-    #undef  LSS_BODY_ASM1
-    #undef  LSS_BODY_ASM2
-    #undef  LSS_BODY_ASM3
-    #undef  LSS_BODY_ASM4
-    #undef  LSS_BODY_ASM5
-    #undef  LSS_BODY_ASM6
-    #define LSS_BODY_ASM0
-    #define LSS_BODY_ASM1 LSS_BODY_ASM0
-    #define LSS_BODY_ASM2 LSS_BODY_ASM1
-    #define LSS_BODY_ASM3 LSS_BODY_ASM2
-    #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
-    #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
-    #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
-
-    #undef  LSS_BODY_CLOBBER0
-    #undef  LSS_BODY_CLOBBER1
-    #undef  LSS_BODY_CLOBBER2
-    #undef  LSS_BODY_CLOBBER3
-    #undef  LSS_BODY_CLOBBER4
-    #undef  LSS_BODY_CLOBBER5
-    #undef  LSS_BODY_CLOBBER6
-    #define LSS_BODY_CLOBBER0
-    #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
-    #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
-    #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
-    #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
-    #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
-    #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
-
-    #undef  LSS_BODY_ARG0
-    #undef  LSS_BODY_ARG1
-    #undef  LSS_BODY_ARG2
-    #undef  LSS_BODY_ARG3
-    #undef  LSS_BODY_ARG4
-    #undef  LSS_BODY_ARG5
-    #undef  LSS_BODY_ARG6
-    #define LSS_BODY_ARG0()
-    #define LSS_BODY_ARG1(arg1) \
-      LSS_BODY_ARG0(), "D" (arg1)
-    #define LSS_BODY_ARG2(arg1, arg2) \
-      LSS_BODY_ARG1(arg1), "S" (arg2)
-    #define LSS_BODY_ARG3(arg1, arg2, arg3) \
-      LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
-    #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
-      LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
-    #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
-      LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
-    #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
-      LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
-
-    #undef _syscall0
-    #define _syscall0(type,name)                                              \
-      type LSS_NAME(name)() {                                                 \
-        LSS_BODY(0, type, name);                                              \
-      }
-    #undef _syscall1
-    #define _syscall1(type,name,type1,arg1)                                   \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1));                       \
-      }
-    #undef _syscall2
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
-      }
-    #undef _syscall3
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3));                       \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
-      }
-    #undef _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
-                                LSS_SYSCALL_ARG(arg5));                       \
-      }
-    #undef _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
-                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
-                                LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long long __res;
-      {
-        __asm__ __volatile__(/* if (fn == NULL)
-                              *   return -EINVAL;
-                              */
-                             "testq  %4,%4\n"
-                             "jz     1f\n"
-
-                             /* if (child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "testq  %5,%5\n"
-                             "jz     1f\n"
-
-                             /* Set up alignment of the child stack:
-                              * child_stack = (child_stack & ~0xF) - 16;
-                              */
-                             "andq   $-16,%5\n"
-                             "subq   $16,%5\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-                             "movq   %7,8(%5)\n"
-                             "movq   %4,0(%5)\n"
-
-                             /* %rax = syscall(%rax = __NR_clone,
-                              *                %rdi = flags,
-                              *                %rsi = child_stack,
-                              *                %rdx = parent_tidptr,
-                              *                %r8  = new_tls,
-                              *                %r10 = child_tidptr)
-                              */
-                             "movq   %2,%%rax\n"
-                             "movq   %9,%%r8\n"
-                             "movq   %10,%%r10\n"
-                             "syscall\n"
-
-                             /* if (%rax != 0)
-                              *   return;
-                              */
-                             "testq  %%rax,%%rax\n"
-                             "jnz    1f\n"
-
-                             /* In the child. Terminate frame pointer chain.
-                              */
-                             "xorq   %%rbp,%%rbp\n"
-
-                             /* Call "fn(arg)".
-                              */
-                             "popq   %%rax\n"
-                             "popq   %%rdi\n"
-                             "call   *%%rax\n"
-
-                             /* Call _exit(%ebx).
-                              */
-                             "movq   %%rax,%%rdi\n"
-                             "movq   %3,%%rax\n"
-                             "syscall\n"
-
-                             /* Return to parent.
-                              */
-                           "1:\n"
-                             : "=a" (__res)
-                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(LSS_SYSCALL_ARG(fn)),
-                               "S"(LSS_SYSCALL_ARG(child_stack)),
-                               "D"(LSS_SYSCALL_ARG(flags)),
-                               "r"(LSS_SYSCALL_ARG(arg)),
-                               "d"(LSS_SYSCALL_ARG(parent_tidptr)),
-                               "r"(LSS_SYSCALL_ARG(newtls)),
-                               "r"(LSS_SYSCALL_ARG(child_tidptr))
-                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
-      }
-      LSS_RETURN(int, __res);
-    }
-
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
-      /* On x86-64, the kernel does not know how to return from
-       * a signal handler. Instead, it relies on user space to provide a
-       * restorer function that calls the rt_sigreturn() system call.
-       * Unfortunately, we cannot just reference the glibc version of this
-       * function, as glibc goes out of its way to make it inaccessible.
-       */
-      long long res;
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
-                         "1:movq   %1,%%rax\n"
-                           "syscall\n"
-                         "2:popq   %0\n"
-                           "addq   $(1b-0b),%0\n"
-                           : "=a" (res)
-                           : "i"  (__NR_rt_sigreturn));
-      return (void (*)(void))(uintptr_t)res;
-    }
-  #elif defined(__arm__)
-    /* Most definitions of _syscallX() neglect to mark "memory" as being
-     * clobbered. This causes problems with compilers, that do a better job
-     * at optimizing across __asm__ calls.
-     * So, we just have to redefine all fo the _syscallX() macros.
-     */
-    #undef LSS_REG
-    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
-
-    /* r0..r3 are scratch registers and not preserved across function
-     * calls.  We need to first evaluate the first 4 syscall arguments
-     * and store them on stack.  They must be loaded into r0..r3 after
-     * all function calls to avoid r0..r3 being clobbered.
-     */
-    #undef LSS_SAVE_ARG
-    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
-    #undef LSS_LOAD_ARG
-    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
-
-    #undef  LSS_BODY
-    #define LSS_BODY(type, name, args...)                                     \
-          register long __res_r0 __asm__("r0");                               \
-          long __res;                                                         \
-          __SYS_REG(name)                                                     \
-          __asm__ __volatile__ (__syscall_safe(name)                          \
-                                : "=r"(__res_r0)                              \
-                                : __SYS_REG_LIST(args)                        \
-                                : "lr", "memory");                            \
-          __res = __res_r0;                                                   \
-          LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
-        LSS_BODY(type, name);                                                 \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        /* There is no need for using a volatile temp.  */                    \
-        LSS_REG(0, arg1);                                                     \
-        LSS_BODY(type, name, "r"(__r0));                                      \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
-      }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4)                                            \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
-      }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5)                               \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
-                             "r"(__r4));                                      \
-      }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5, type6, arg6)                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_REG(5, arg6);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
-                             "r"(__r4), "r"(__r5));                           \
-      }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      register long __res __asm__("r5");
-      {
-        if (fn == NULL || child_stack == NULL) {
-            __res = -EINVAL;
-            goto clone_exit;
-        }
-
-        /* stash first 4 arguments on stack first because we can only load
-         * them after all function calls.
-         */
-        int    tmp_flags = flags;
-        int  * tmp_stack = (int*) child_stack;
-        void * tmp_ptid  = parent_tidptr;
-        void * tmp_tls   = newtls;
-
-        register int  *__ctid  __asm__("r4") = child_tidptr;
-
-        /* Push "arg" and "fn" onto the stack that will be
-         * used by the child.
-         */
-        *(--tmp_stack) = (int) arg;
-        *(--tmp_stack) = (int) fn;
-
-        /* We must load r0..r3 last after all possible function calls.  */
-        register int   __flags __asm__("r0") = tmp_flags;
-        register void *__stack __asm__("r1") = tmp_stack;
-        register void *__ptid  __asm__("r2") = tmp_ptid;
-        register void *__tls   __asm__("r3") = tmp_tls;
-
-        /* %r0 = syscall(%r0 = flags,
-         *               %r1 = child_stack,
-         *               %r2 = parent_tidptr,
-         *               %r3 = newtls,
-         *               %r4 = child_tidptr)
-         */
-        __SYS_REG(clone)
-        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
-                              *               %r1 = child_stack,
-                              *               %r2 = parent_tidptr,
-                              *               %r3 = newtls,
-                              *               %r4 = child_tidptr)
-                              */
-                             "push  {r7}\n"
-                             "mov   r7,%1\n"
-                             __syscall(clone)"\n"
-
-                             /* if (%r0 != 0)
-                              *   return %r0;
-                              */
-                             "movs  %0,r0\n"
-                             "bne   1f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-                             "ldr   r0,[sp, #4]\n"
-                             "mov   lr,pc\n"
-                             "ldr   pc,[sp]\n"
-
-                             /* Call _exit(%r0), which never returns.  We only
-                              * need to set r7 for EABI syscall ABI but we do
-                              * this always to simplify code sharing between
-                              * old and new syscall ABIs.
-                              */
-                             "mov   r7,%2\n"
-                             __syscall(exit)"\n"
-
-                             /* Pop r7 from the stack only in the parent.
-                              */
-                           "1: pop {r7}\n"
-                             : "=r" (__res)
-                             : "r"(__sysreg),
-                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
-                               "r"(__ptid), "r"(__tls), "r"(__ctid)
-                             : "cc", "lr", "memory");
-      }
-      clone_exit:
-      LSS_RETURN(int, __res);
-    }
-  #elif defined(__mips__)
-    #undef LSS_REG
-    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
-                                 (unsigned long)(a)
-
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html
-    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\
-                                "$13", "$14", "$15", "$24", "$25", "memory"
-    #else
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \
-                                "$14", "$15", "$24", "$25", "memory"
-    #endif
-
-    #undef  LSS_BODY
-    #define LSS_BODY(type,name,r7,...)                                        \
-          register unsigned long __v0 __asm__("$2") = __NR_##name;            \
-          __asm__ __volatile__ ("syscall\n"                                   \
-                                : "=&r"(__v0), r7 (__r7)                      \
-                                : "0"(__v0), ##__VA_ARGS__                    \
-                                : MIPS_SYSCALL_CLOBBERS);                     \
-          LSS_RETURN(type, __v0, __r7)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_BODY(type, name, "=r");                                           \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        register unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \
-      }
-    #undef _syscall5
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument
-     * on the stack, whereas the new APIs use registers "r8" and "r9".
-     */
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
-                              "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "li    $2, %2\n"                                \
-                              "syscall\n"                                     \
-                              "addiu $29, 32\n"                               \
-                              ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
-                                "r"(__r6), "m" ((unsigned long)arg5)          \
-                              : MIPS_SYSCALL_CLOBBERS);                       \
-        LSS_RETURN(type, __v0, __r7);                                         \
-      }
-    #else
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
-                 "r"(__r8));                                                  \
-      }
-    #endif
-    #undef _syscall6
-    #if _MIPS_SIM == _MIPS_SIM_ABI32
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument
-     * on the stack, whereas the new APIs use registers "r8" and "r9".
-     */
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
-                              "lw    $8, %7\n"                                \
-                              "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "sw    $8, 20($29)\n"                           \
-                              "li    $2, %2\n"                                \
-                              "syscall\n"                                     \
-                              "addiu $29, 32\n"                               \
-                              ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
-                                "r"(__r6), "m" ((unsigned long)arg5),         \
-                                "m" ((unsigned long)arg6)                     \
-                              : MIPS_SYSCALL_CLOBBERS);                       \
-        LSS_RETURN(type, __v0, __r7);                                         \
-      }
-    #else
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5,type6 arg6) {                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
-        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
-                 "r"(__r8), "r"(__r9));                                       \
-      }
-    #endif
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      register unsigned long __v0 __asm__("$2");
-      register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
-      {
-        register int   __flags __asm__("$4") = flags;
-        register void *__stack __asm__("$5") = child_stack;
-        register void *__ptid  __asm__("$6") = parent_tidptr;
-        register int  *__ctid  __asm__("$8") = child_tidptr;
-        __asm__ __volatile__(
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "subu  $29,24\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "sub   $29,16\n"
-          #else
-                             "dsubu $29,16\n"
-          #endif
-
-                             /* if (fn == NULL || child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "li    %0,%2\n"
-                             "beqz  %5,1f\n"
-                             "beqz  %6,1f\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "subu  %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,4(%6)\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "sub   %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,8(%6)\n"
-          #else
-                             "dsubu %6,32\n"
-                             "sd    %5,0(%6)\n"
-                             "sd    %8,8(%6)\n"
-          #endif
-
-                             /* $7 = syscall($4 = flags,
-                              *              $5 = child_stack,
-                              *              $6 = parent_tidptr,
-                              *              $7 = newtls,
-                              *              $8 = child_tidptr)
-                              */
-                             "li    $2,%3\n"
-                             "syscall\n"
-
-                             /* if ($7 != 0)
-                              *   return $2;
-                              */
-                             "bnez  $7,1f\n"
-                             "bnez  $2,1f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                            "lw    $25,0($29)\n"
-                            "lw    $4,4($29)\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                            "lw    $25,0($29)\n"
-                            "lw    $4,8($29)\n"
-          #else
-                            "ld    $25,0($29)\n"
-                            "ld    $4,8($29)\n"
-          #endif
-                            "jalr  $25\n"
-
-                             /* Call _exit($2)
-                              */
-                            "move  $4,$2\n"
-                            "li    $2,%4\n"
-                            "syscall\n"
-
-                           "1:\n"
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "addu  $29, 24\n"
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "add   $29, 16\n"
-          #else
-                             "daddu $29,16\n"
-          #endif
-                             : "=&r" (__v0), "=r" (__r7)
-                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
-                               "r"(__ptid), "r"(__r7), "r"(__ctid)
-                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
-                               "$24", "memory");
-      }
-      LSS_RETURN(int, __v0, __r7);
-    }
-  #elif defined (__PPC__)
-    #undef  LSS_LOADARGS_0
-    #define LSS_LOADARGS_0(name, dummy...)                                    \
-        __sc_0 = __NR_##name
-    #undef  LSS_LOADARGS_1
-    #define LSS_LOADARGS_1(name, arg1)                                        \
-            LSS_LOADARGS_0(name);                                             \
-            __sc_3 = (unsigned long) (arg1)
-    #undef  LSS_LOADARGS_2
-    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \
-            LSS_LOADARGS_1(name, arg1);                                       \
-            __sc_4 = (unsigned long) (arg2)
-    #undef  LSS_LOADARGS_3
-    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \
-            LSS_LOADARGS_2(name, arg1, arg2);                                 \
-            __sc_5 = (unsigned long) (arg3)
-    #undef  LSS_LOADARGS_4
-    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \
-            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \
-            __sc_6 = (unsigned long) (arg4)
-    #undef  LSS_LOADARGS_5
-    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \
-            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \
-            __sc_7 = (unsigned long) (arg5)
-    #undef  LSS_LOADARGS_6
-    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \
-            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \
-            __sc_8 = (unsigned long) (arg6)
-    #undef  LSS_ASMINPUT_0
-    #define LSS_ASMINPUT_0 "0" (__sc_0)
-    #undef  LSS_ASMINPUT_1
-    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
-    #undef  LSS_ASMINPUT_2
-    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
-    #undef  LSS_ASMINPUT_3
-    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
-    #undef  LSS_ASMINPUT_4
-    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
-    #undef  LSS_ASMINPUT_5
-    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
-    #undef  LSS_ASMINPUT_6
-    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
-    #undef  LSS_BODY
-    #define LSS_BODY(nr, type, name, args...)                                 \
-        long __sc_ret, __sc_err;                                              \
-        {                                                                     \
-            register unsigned long __sc_0 __asm__ ("r0");                     \
-            register unsigned long __sc_3 __asm__ ("r3");                     \
-            register unsigned long __sc_4 __asm__ ("r4");                     \
-            register unsigned long __sc_5 __asm__ ("r5");                     \
-            register unsigned long __sc_6 __asm__ ("r6");                     \
-            register unsigned long __sc_7 __asm__ ("r7");                     \
-            register unsigned long __sc_8 __asm__ ("r8");                     \
-                                                                              \
-            LSS_LOADARGS_##nr(name, args);                                    \
-            __asm__ __volatile__                                              \
-                ("sc\n\t"                                                     \
-                 "mfcr %0"                                                    \
-                 : "=&r" (__sc_0),                                            \
-                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
-                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
-                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
-                 : LSS_ASMINPUT_##nr                                          \
-                 : "cr0", "ctr", "memory",                                    \
-                   "r9", "r10", "r11", "r12");                                \
-            __sc_ret = __sc_3;                                                \
-            __sc_err = __sc_0;                                                \
-        }                                                                     \
-        LSS_RETURN(type, __sc_ret, __sc_err)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-       type LSS_NAME(name)(void) {                                            \
-          LSS_BODY(0, type, name);                                            \
-       }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-       type LSS_NAME(name)(type1 arg1) {                                      \
-          LSS_BODY(1, type, name, arg1);                                      \
-       }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
-          LSS_BODY(2, type, name, arg1, arg2);                                \
-       }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
-          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \
-       }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4)                                \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
-          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \
-       }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-                                               type5 arg5) {                  \
-          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \
-       }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5, type6, arg6)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-                                               type5 arg5, type6 arg6) {      \
-          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
-       }
-    /* clone function adapted from glibc 2.18 clone.S                       */
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __ret, __err;
-      {
-#if defined(__PPC64__)
-
-/* Stack frame offsets.  */
-#if _CALL_ELF != 2
-#define FRAME_MIN_SIZE         112
-#define FRAME_TOC_SAVE         40
-#else
-#define FRAME_MIN_SIZE         32
-#define FRAME_TOC_SAVE         24
-#endif
-
-
-        register int (*__fn)(void *) __asm__ ("r3") = fn;
-        register void *__cstack      __asm__ ("r4") = child_stack;
-        register int __flags         __asm__ ("r5") = flags;
-        register void * __arg        __asm__ ("r6") = arg;
-        register int * __ptidptr     __asm__ ("r7") = parent_tidptr;
-        register void * __newtls     __asm__ ("r8") = newtls;
-        register int * __ctidptr     __asm__ ("r9") = child_tidptr;
-        __asm__ __volatile__(
-            /* check for fn == NULL
-             * and child_stack == NULL
-             */
-            "cmpdi cr0, %6, 0\n\t"
-            "cmpdi cr1, %7, 0\n\t"
-            "cror  cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
-            "beq-  cr0, 1f\n\t"
-
-            /* set up stack frame for child                                  */
-            "clrrdi %7, %7, 4\n\t"
-            "li     0, 0\n\t"
-            "stdu   0, -%13(%7)\n\t"
-
-            /* fn, arg, child_stack are saved acrVoss the syscall             */
-            "mr 28, %6\n\t"
-            "mr 29, %7\n\t"
-            "mr 27, %9\n\t"
-
-            /* syscall
-               r3 == flags
-               r4 == child_stack
-               r5 == parent_tidptr
-               r6 == newtls
-               r7 == child_tidptr                                            */
-            "mr 3, %8\n\t"
-            "mr 5, %10\n\t"
-            "mr 6, %11\n\t"
-            "mr 7, %12\n\t"
-	    "li	0, %4\n\t"
-            "sc\n\t"
-
-            /* Test if syscall was successful                                */
-            "cmpdi  cr1, 3, 0\n\t"
-            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
-            "bne-   cr1, 1f\n\t"
-
-            /* Do the function call                                          */
-            "std   2, %14(1)\n\t"
-#if _CALL_ELF != 2
-	    "ld    0, 0(28)\n\t"
-	    "ld    2, 8(28)\n\t"
-            "mtctr 0\n\t"
-#else
-            "mr    12, 28\n\t"
-            "mtctr 12\n\t"
-#endif
-            "mr    3, 27\n\t"
-            "bctrl\n\t"
-	    "ld    2, %14(1)\n\t"
-
-            /* Call _exit(r3)                                                */
-            "li 0, %5\n\t"
-            "sc\n\t"
-
-            /* Return to parent                                              */
-	    "1:\n\t"
-            "mr %0, 3\n\t"
-              : "=r" (__ret), "=r" (__err)
-              : "0" (-1), "i" (EINVAL),
-                "i" (__NR_clone), "i" (__NR_exit),
-                "r" (__fn), "r" (__cstack), "r" (__flags),
-                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-                "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE)
-              : "cr0", "cr1", "memory", "ctr",
-                "r0", "r29", "r27", "r28");
-#else
-        register int (*__fn)(void *)    __asm__ ("r8")  = fn;
-        register void *__cstack                 __asm__ ("r4")  = child_stack;
-        register int __flags                    __asm__ ("r3")  = flags;
-        register void * __arg                   __asm__ ("r9")  = arg;
-        register int * __ptidptr                __asm__ ("r5")  = parent_tidptr;
-        register void * __newtls                __asm__ ("r6")  = newtls;
-        register int * __ctidptr                __asm__ ("r7")  = child_tidptr;
-        __asm__ __volatile__(
-            /* check for fn == NULL
-             * and child_stack == NULL
-             */
-            "cmpwi cr0, %6, 0\n\t"
-            "cmpwi cr1, %7, 0\n\t"
-            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
-            "beq- cr0, 1f\n\t"
-
-            /* set up stack frame for child                                  */
-            "clrrwi %7, %7, 4\n\t"
-            "li 0, 0\n\t"
-            "stwu 0, -16(%7)\n\t"
-
-            /* fn, arg, child_stack are saved across the syscall: r28-30     */
-            "mr 28, %6\n\t"
-            "mr 29, %7\n\t"
-            "mr 27, %9\n\t"
-
-            /* syscall                                                       */
-            "li 0, %4\n\t"
-            /* flags already in r3
-             * child_stack already in r4
-             * ptidptr already in r5
-             * newtls already in r6
-             * ctidptr already in r7
-             */
-            "sc\n\t"
-
-            /* Test if syscall was successful                                */
-            "cmpwi cr1, 3, 0\n\t"
-            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
-            "bne- cr1, 1f\n\t"
-
-            /* Do the function call                                          */
-            "mtctr 28\n\t"
-            "mr 3, 27\n\t"
-            "bctrl\n\t"
-
-            /* Call _exit(r3)                                                */
-            "li 0, %5\n\t"
-            "sc\n\t"
-
-            /* Return to parent                                              */
-            "1:\n"
-            "mfcr %1\n\t"
-            "mr %0, 3\n\t"
-              : "=r" (__ret), "=r" (__err)
-              : "0" (-1), "1" (EINVAL),
-                "i" (__NR_clone), "i" (__NR_exit),
-                "r" (__fn), "r" (__cstack), "r" (__flags),
-                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-                "r" (__ctidptr)
-              : "cr0", "cr1", "memory", "ctr",
-                "r0", "r29", "r27", "r28");
-
-#endif
-      }
-      LSS_RETURN(int, __ret, __err);
-    }
-  #elif defined(__aarch64__)
-    #undef LSS_REG
-    #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a
-    #undef  LSS_BODY
-    #define LSS_BODY(type,name,args...)                                       \
-          register long __res_x0 __asm__("x0");                               \
-          long __res;                                                         \
-          __asm__ __volatile__ ("mov x8, %1\n"                                \
-                                "svc 0x0\n"                                   \
-                                : "=r"(__res_x0)                              \
-                                : "i"(__NR_##name) , ## args                  \
-                                : "memory");                                  \
-          __res = __res_x0;                                                   \
-          LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)(void) {                                             \
-        LSS_BODY(type, name);                                                 \
-      }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-      type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0));                    \
-      }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1));                           \
-      }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2));                \
-      }
-    #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4);                                                     \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3));     \
-      }
-    #undef _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5) {                                       \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3),      \
-                             "r"(__x4));                                      \
-      }
-    #undef _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
-                          type5 arg5, type6 arg6) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
-        LSS_BODY(type, name, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3),      \
-                             "r"(__x4), "r"(__x5));                           \
-      }
-    /* clone function adapted from glibc 2.18 clone.S                       */
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __res;
-      {
-        register int (*__fn)(void *)  __asm__("x0") = fn;
-        register void *__stack __asm__("x1") = child_stack;
-        register int   __flags __asm__("x2") = flags;
-        register void *__arg   __asm__("x3") = arg;
-        register int  *__ptid  __asm__("x4") = parent_tidptr;
-        register void *__tls   __asm__("x5") = newtls;
-        register int  *__ctid  __asm__("x6") = child_tidptr;
-        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "cbz     x0,1f\n"
-                             "cbz     x1,1f\n"
-
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-                             "stp x0,x3, [x1, #-16]!\n"
-
-                             "mov x0,x2\n" /* flags  */
-                             "mov x2,x4\n" /* ptid  */
-                             "mov x3,x5\n" /* tls */
-                             "mov x4,x6\n" /* ctid */
-                             "mov x8,%9\n" /* clone */
-
-                             "svc 0x0\n"
-
-                             /* if (%r0 != 0)
-                              *   return %r0;
-                              */
-                             "cmp x0, #0\n"
-                             "bne 2f\n"
-
-                             /* In the child, now. Call "fn(arg)".
-                              */
-                             "ldp x1, x0, [sp], #16\n"
-                             "blr x1\n"
-
-                             /* Call _exit(%r0).
-                              */
-                             "mov x8, %10\n"
-                             "svc 0x0\n"
-                           "1:\n"
-                             "mov x8, %1\n"
-                           "2:\n"
-                             : "=r" (__res)
-                             : "i"(-EINVAL),
-                               "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
-                               "r"(__ptid), "r"(__tls), "r"(__ctid),
-                               "i"(__NR_clone), "i"(__NR_exit)
-                             : "x30", "memory");
-      }
-      LSS_RETURN(int, __res);
-    }
-  #elif defined(__s390__)
-    #undef  LSS_REG
-    #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a
-    #undef  LSS_BODY
-    #define LSS_BODY(type, name, args...)                                     \
-        register unsigned long __nr __asm__("r1")                             \
-            = (unsigned long)(__NR_##name);                                   \
-        register long __res_r2 __asm__("r2");                                 \
-        long __res;                                                           \
-        __asm__ __volatile__                                                  \
-            ("svc 0\n\t"                                                      \
-             : "=d"(__res_r2)                                                 \
-             : "d"(__nr), ## args                                             \
-             : "memory");                                                     \
-        __res = __res_r2;                                                     \
-        LSS_RETURN(type, __res)
-    #undef _syscall0
-    #define _syscall0(type, name)                                             \
-       type LSS_NAME(name)(void) {                                            \
-          LSS_BODY(type, name);                                               \
-       }
-    #undef _syscall1
-    #define _syscall1(type, name, type1, arg1)                                \
-       type LSS_NAME(name)(type1 arg1) {                                      \
-          LSS_REG(2, arg1);                                                   \
-          LSS_BODY(type, name, "0"(__r2));                                    \
-       }
-    #undef _syscall2
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
-          LSS_REG(2, arg1); LSS_REG(3, arg2);                                 \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3));                         \
-       }
-    #undef _syscall3
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4));              \
-       }
-    #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4)                                \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4) {                                      \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4);                                                   \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5));                                    \
-       }
-    #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5)                   \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4, type5 arg5) {                          \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4); LSS_REG(6, arg5);                                 \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5), "d"(__r6));                         \
-       }
-    #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                                  type4, arg4, type5, arg5, type6, arg6)      \
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
-                           type4 arg4, type5 arg5, type6 arg6) {              \
-          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
-          LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6);               \
-          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
-                               "d"(__r5), "d"(__r6), "d"(__r7));              \
-       }
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
-                                   int flags, void *arg, int *parent_tidptr,
-                                   void *newtls, int *child_tidptr) {
-      long __ret;
-      {
-        register int  (*__fn)(void *)    __asm__ ("r1")  = fn;
-        register void  *__cstack         __asm__ ("r2")  = child_stack;
-        register int    __flags          __asm__ ("r3")  = flags;
-        register void  *__arg            __asm__ ("r0")  = arg;
-        register int   *__ptidptr        __asm__ ("r4")  = parent_tidptr;
-        register void  *__newtls         __asm__ ("r6")  = newtls;
-        register int   *__ctidptr        __asm__ ("r5")  = child_tidptr;
-        __asm__ __volatile__ (
-    #ifndef __s390x__
-                                  /* arg already in r0 */
-          "ltr %4, %4\n\t"        /* check fn, which is already in r1 */
-          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
-          "ltr %5, %5\n\t"        /* check child_stack, which is already in r2 */
-          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
-                                  /* flags already in r3 */
-                                  /* parent_tidptr already in r4 */
-                                  /* child_tidptr already in r5 */
-                                  /* newtls already in r6 */
-          "svc %2\n\t"            /* invoke clone syscall */
-          "ltr %0,%%r2\n\t"       /* load return code into __ret and test */
-          "jnz 1f\n\t"            /* return to parent if non-zero */
-                                  /* start child thread */
-          "lr %%r2, %7\n\t"       /* set first parameter to void *arg */
-          "ahi %%r15, -96\n\t"    /* make room on the stack for the save area */
-          "xc 0(4,%%r15), 0(%%r15)\n\t"
-          "basr %%r14, %4\n\t"    /* jump to fn */
-          "svc %3\n"              /* invoke exit syscall */
-          "1:\n"
-    #else
-                                  /* arg already in r0 */
-          "ltgr %4, %4\n\t"       /* check fn, which is already in r1 */
-          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
-          "ltgr %5, %5\n\t"       /* check child_stack, which is already in r2 */
-          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
-                                  /* flags already in r3 */
-                                  /* parent_tidptr already in r4 */
-                                  /* child_tidptr already in r5 */
-                                  /* newtls already in r6 */
-          "svc %2\n\t"            /* invoke clone syscall */
-          "ltgr %0, %%r2\n\t"     /* load return code into __ret and test */
-          "jnz 1f\n\t"            /* return to parent if non-zero */
-                                  /* start child thread */
-          "lgr %%r2, %7\n\t"      /* set first parameter to void *arg */
-          "aghi %%r15, -160\n\t"  /* make room on the stack for the save area */
-          "xc 0(8,%%r15), 0(%%r15)\n\t"
-          "basr %%r14, %4\n\t"    /* jump to fn */
-          "svc %3\n"              /* invoke exit syscall */
-          "1:\n"
-    #endif
-          : "=r" (__ret)
-          : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit),
-            "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg),
-            "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr)
-          : "cc", "r14", "memory"
-        );
-      }
-      LSS_RETURN(int, __ret);
-    }
-  #endif
-  #define __NR__exit   __NR_exit
-  #define __NR__gettid __NR_gettid
-  #define __NR__mremap __NR_mremap
-  LSS_INLINE _syscall1(int,     close,           int,         f)
-  LSS_INLINE _syscall1(int,     _exit,           int,         e)
-  LSS_INLINE _syscall3(int,     fcntl,           int,         f,
-                       int,            c, long,   a)
-  LSS_INLINE _syscall2(int,     fstat,           int,         f,
-                      struct kernel_stat*,   b)
-  LSS_INLINE _syscall6(int,     futex,           int*,        a,
-                       int,            o, int,    v,
-                      struct kernel_timespec*, t,
-                       int*, a2,
-                       int, v3)
-#ifdef __NR_getdents64
-    LSS_INLINE _syscall3(int,     getdents64,      int,         f,
-                         struct kernel_dirent64*, d, int,    c)
-#define KERNEL_DIRENT kernel_dirent64
-#define GETDENTS sys_getdents64
-#else
-    LSS_INLINE _syscall3(int,     getdents,        int,         f,
-                         struct kernel_dirent*, d, int,    c)
-#define KERNEL_DIRENT kernel_dirent
-#define GETDENTS sys_getdents
-#endif
-  LSS_INLINE _syscall0(pid_t,   getpid)
-  LSS_INLINE _syscall0(pid_t,   getppid)
-  LSS_INLINE _syscall0(pid_t,   _gettid)
-  LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
-                       int,            s)
-  #if defined(__x86_64__)
-    /* Need to make sure off_t isn't truncated to 32-bits under x32.  */
-    LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
-      _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
-                                        LSS_SYSCALL_ARG(w));
-    }
-  #else
-    LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
-                         off_t,          o, int,    w)
-  #endif
-  LSS_INLINE _syscall2(int,     munmap,          void*,       s,
-                       size_t,         l)
-  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
-                       size_t,         os,       size_t,      ns,
-                       unsigned long,  f, void *, a)
-  LSS_INLINE _syscall2(int,     prctl,           int,         o,
-                       long,           a)
-  LSS_INLINE _syscall4(long,    ptrace,          int,         r,
-                       pid_t,          p, void *, a, void *, d)
-  LSS_INLINE _syscall3(ssize_t, read,            int,         f,
-                       void *,         b, size_t, c)
-  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
-                       const struct kernel_sigaction*, a,
-                       struct kernel_sigaction*, o, size_t,   c)
-  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
-                       const struct kernel_sigset_t*,  s,
-                       struct kernel_sigset_t*,        o, size_t, c);
-  LSS_INLINE _syscall0(int,     sched_yield)
-  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
-                       const stack_t*, o)
-  #if defined(__NR_fstatat)
-    LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p,
-                         struct kernel_stat*,   b, int, flags)
-    LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) {
-      return LSS_NAME(fstatat)(AT_FDCWD,p,b,0);
-  }
-  #else
-    LSS_INLINE _syscall2(int,     stat,            const char*, f,
-                         struct kernel_stat*,   b)
-  #endif
-  LSS_INLINE _syscall3(ssize_t, write,            int,        f,
-                       const void *,   b, size_t, c)
-  #if defined(__NR_getcpu)
-    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
-                         unsigned *, node, void *, unused);
-  #endif
-  #if defined(__x86_64__) || defined(__aarch64__) || \
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
-    LSS_INLINE _syscall3(int, socket,             int,   d,
-                         int,                     t, int,       p)
-  #endif
-  #if defined(__x86_64__) || defined(__s390x__)
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-      #if defined(__x86_64__)
-      /* On x86_64, the kernel requires us to always set our own
-       * SA_RESTORER in order to be able to return from a signal handler.
-       * This function must have a "magic" signature that the "gdb"
-       * (and maybe the kernel?) can recognize.
-       */
-      if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
-        struct kernel_sigaction a = *act;
-        a.sa_flags   |= SA_RESTORER;
-        a.sa_restorer = LSS_NAME(restore_rt)();
-        return LSS_NAME(rt_sigaction)(signum, &a, oldact,
-                                      (KERNEL_NSIG+7)/8);
-      } else
-      #endif
-        return LSS_NAME(rt_sigaction)(signum, act, oldact,
-                                      (KERNEL_NSIG+7)/8);
-    }
-
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-    }
-  #endif
-  #if (defined(__aarch64__)) || \
-      (defined(__mips__) && (_MIPS_ISA == _MIPS_ISA_MIPS64))
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-        return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8);
-
-    }
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-    }
-  #endif
-  #ifdef __NR_wait4
-    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
-                         int*,                    s, int,       o,
-                         struct kernel_rusage*,   r)
-    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
-      return LSS_NAME(wait4)(pid, status, options, 0);
-    }
-  #else
-    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
-                         int*,              s,    int,   o)
-  #endif
-  #ifdef __NR_openat
-    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
-    LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) {
-      return LSS_NAME(openat)(AT_FDCWD,p,f,m );
-    }
-  #else
-  LSS_INLINE _syscall3(int,     open,            const char*, p,
-                       int,            f, int,    m)
-  #endif
-  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
-    memset(&set->sig, 0, sizeof(set->sig));
-    return 0;
-  }
-
-  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
-    memset(&set->sig, -1, sizeof(set->sig));
-    return 0;
-  }
-
-  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
-                                     int signum) {
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
-      LSS_ERRNO = EINVAL;
-      return -1;
-    } else {
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
-          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
-      return 0;
-    }
-  }
-
-  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
-                                        int signum) {
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
-      LSS_ERRNO = EINVAL;
-      return -1;
-    } else {
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
-          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
-      return 0;
-    }
-  }
-
-  #if defined(__i386__) ||                                                    \
-      defined(__arm__) ||                                                     \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
-      defined(__PPC__) ||                                                     \
-     (defined(__s390__) && !defined(__s390x__))
-    #define __NR__sigaction   __NR_sigaction
-    #define __NR__sigprocmask __NR_sigprocmask
-    LSS_INLINE _syscall2(int, fstat64,             int, f,
-                         struct kernel_stat64 *, b)
-    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
-                         loff_t *, res, uint, wh)
-#if defined(__s390__) && !defined(__s390x__)
-    /* On s390, mmap2() arguments are passed in memory. */
-    LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d,
-                                      off_t o) {
-      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
-                               (unsigned long) p, (unsigned long) f,
-                               (unsigned long) d, (unsigned long) o };
-      LSS_REG(2, buf);
-      LSS_BODY(void*, mmap2, "0"(__r2));
-    }
-#elif !defined(__PPC64__)
-    #define __NR__mmap2 __NR_mmap2
-    LSS_INLINE _syscall6(void*, _mmap2,            void*, s,
-                         size_t,                   l, int,               p,
-                         int,                      f, int,               d,
-                         off_t,                    o)
-#endif
-    LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
-                         const struct kernel_old_sigaction*,  a,
-                         struct kernel_old_sigaction*,        o)
-    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
-                         const unsigned long*,     s,
-                         unsigned long*,           o)
-    LSS_INLINE _syscall2(int, stat64,              const char *, p,
-                         struct kernel_stat64 *, b)
-
-    LSS_INLINE int LSS_NAME(sigaction)(int signum,
-                                       const struct kernel_sigaction *act,
-                                       struct kernel_sigaction *oldact) {
-      int old_errno = LSS_ERRNO;
-      int rc;
-      struct kernel_sigaction a;
-      if (act != NULL) {
-        a             = *act;
-        #ifdef __i386__
-        /* On i386, the kernel requires us to always set our own
-         * SA_RESTORER when using realtime signals. Otherwise, it does not
-         * know how to return from a signal handler. This function must have
-         * a "magic" signature that the "gdb" (and maybe the kernel?) can
-         * recognize.
-         * Apparently, a SA_RESTORER is implicitly set by the kernel, when
-         * using non-realtime signals.
-         *
-         * TODO: Test whether ARM needs a restorer
-         */
-        if (!(a.sa_flags & SA_RESTORER)) {
-          a.sa_flags   |= SA_RESTORER;
-          a.sa_restorer = (a.sa_flags & SA_SIGINFO)
-                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
-        }
-        #endif
-      }
-      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
-                                  (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
-        if (!act) {
-          ptr_a            = NULL;
-        } else {
-          oa.sa_handler_   = act->sa_handler_;
-          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
-          #ifndef __mips__
-          oa.sa_restorer   = act->sa_restorer;
-          #endif
-          oa.sa_flags      = act->sa_flags;
-        }
-        if (!oldact) {
-          ptr_oa           = NULL;
-        }
-        LSS_ERRNO = old_errno;
-        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
-        if (rc == 0 && oldact) {
-          if (act) {
-            memcpy(oldact, act, sizeof(*act));
-          } else {
-            memset(oldact, 0, sizeof(*oldact));
-          }
-          oldact->sa_handler_    = ptr_oa->sa_handler_;
-          oldact->sa_flags       = ptr_oa->sa_flags;
-          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
-          #ifndef __mips__
-          oldact->sa_restorer    = ptr_oa->sa_restorer;
-          #endif
-        }
-      }
-      return rc;
-    }
-
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      int olderrno = LSS_ERRNO;
-      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        LSS_ERRNO = olderrno;
-        if (oldset) {
-          LSS_NAME(sigemptyset)(oldset);
-        }
-        rc = LSS_NAME(_sigprocmask)(how,
-                                    set ? &set->sig[0] : NULL,
-                                    oldset ? &oldset->sig[0] : NULL);
-      }
-      return rc;
-    }
-  #endif
-  #if defined(__i386__) ||                                                    \
-      defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) ||                     \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
-     (defined(__PPC__) && !defined(__PPC64__)) ||                             \
-     (defined(__s390__) && !defined(__s390x__))
-    /* On these architectures, implement mmap() with mmap2(). */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      if (o % 4096) {
-        LSS_ERRNO = EINVAL;
-        return (void *) -1;
-      }
-      return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096));
-    }
-  #elif defined(__s390x__)
-    /* On s390x, mmap() arguments are passed in memory. */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
-                               (unsigned long) p, (unsigned long) f,
-                               (unsigned long) d, (unsigned long) o };
-      LSS_REG(2, buf);
-      LSS_BODY(void*, mmap, "0"(__r2));
-    }
-  #elif defined(__x86_64__)
-    /* Need to make sure __off64_t isn't truncated to 32-bits under x32.  */
-    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
-                                    int64_t o) {
-      LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
-                               LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
-                               LSS_SYSCALL_ARG(d), (uint64_t)(o));
-    }
-  #else
-    /* Remaining 64-bit architectures. */
-    LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot,
-                         int, flags, int, fd, int64_t, offset)
-  #endif
-  #if defined(__i386__) || \
-      defined(__PPC__) || \
-      (defined(__arm__) && !defined(__ARM_EABI__)) || \
-      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \
-      defined(__s390__)
-
-    /* See sys_socketcall in net/socket.c in kernel source.
-     * It de-multiplexes on its first arg and unpacks the arglist
-     * array in its second arg.
-     */
-    LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a)
-
-    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
-      unsigned long args[3] = {
-        (unsigned long) domain,
-        (unsigned long) type,
-        (unsigned long) protocol
-      };
-      return LSS_NAME(socketcall)(1, args);
-    }
-  #elif defined(__ARM_EABI__)
-    LSS_INLINE _syscall3(int, socket,             int,   d,
-                         int,                     t, int,       p)
-  #endif
-  #if defined(__mips__)
-    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
-     * both file handles through CPU registers.
-     */
-    LSS_INLINE int LSS_NAME(pipe)(int *p) {
-      register unsigned long __v0 __asm__("$2") = __NR_pipe;
-      register unsigned long __v1 __asm__("$3");
-      register unsigned long __r7 __asm__("$7");
-      __asm__ __volatile__ ("syscall\n"
-                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
-                            : "0"(__v0)
-                            : "$8", "$9", "$10", "$11", "$12",
-                              "$13", "$14", "$15", "$24", "memory");
-      if (__r7) {
-        LSS_ERRNO = __v0;
-        return -1;
-      } else {
-        p[0] = __v0;
-        p[1] = __v1;
-        return 0;
-      }
-    }
-  #elif defined(__NR_pipe2)
-    LSS_INLINE _syscall2(int,     pipe2,          int *, p,
-                         int,     f                        )
-    LSS_INLINE int LSS_NAME(pipe)( int * p) {
-        return LSS_NAME(pipe2)(p, 0);
-    }
-  #else
-    LSS_INLINE _syscall1(int,     pipe,           int *, p)
-  #endif
-
-  LSS_INLINE pid_t LSS_NAME(gettid)() {
-    pid_t tid = LSS_NAME(_gettid)();
-    if (tid != -1) {
-      return tid;
-    }
-    return LSS_NAME(getpid)();
-  }
-
-  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
-                                    size_t new_size, int flags, ...) {
-    va_list ap;
-    void *new_address, *rc;
-    va_start(ap, flags);
-    new_address = va_arg(ap, void *);
-    rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
-                           flags, new_address);
-    va_end(ap);
-    return rc;
-  }
-
-  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
-    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
-     * then sends job control signals to the real parent, rather than to
-     * the tracer. We reduce the risk of this happening by starting a
-     * whole new time slice, and then quickly sending a SIGCONT signal
-     * right after detaching from the tracee.
-     */
-    int rc, err;
-    LSS_NAME(sched_yield)();
-    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
-    err = LSS_ERRNO;
-    LSS_NAME(kill)(pid, SIGCONT);
-    LSS_ERRNO = err;
-    return rc;
-  }
-#endif
-
-#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
-}
-#endif
-
-#endif
-#endif
diff --git a/contrib/libtcmalloc/src/base/linuxthreads.cc b/contrib/libtcmalloc/src/base/linuxthreads.cc
deleted file mode 100644
index 891e70c88c4..00000000000
--- a/contrib/libtcmalloc/src/base/linuxthreads.cc
+++ /dev/null
@@ -1,707 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#include "base/linuxthreads.h"
-
-#ifdef THREADS
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sched.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/prctl.h>
-#include <semaphore.h>
-
-#include "base/linux_syscall_support.h"
-#include "base/thread_lister.h"
-
-#ifndef CLONE_UNTRACED
-#define CLONE_UNTRACED 0x00800000
-#endif
-
-
-/* Synchronous signals that should not be blocked while in the lister thread.
- */
-static const int sync_signals[]  = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
-                                     SIGXCPU, SIGXFSZ };
-
-/* itoa() is not a standard function, and we cannot safely call printf()
- * after suspending threads. So, we just implement our own copy. A
- * recursive approach is the easiest here.
- */
-static char *local_itoa(char *buf, int i) {
-  if (i < 0) {
-    *buf++ = '-';
-    return local_itoa(buf, -i);
-  } else {
-    if (i >= 10)
-      buf = local_itoa(buf, i/10);
-    *buf++ = (i%10) + '0';
-    *buf   = '\000';
-    return buf;
-  }
-}
-
-
-/* Wrapper around clone() that runs "fn" on the same stack as the
- * caller! Unlike fork(), the cloned thread shares the same address space.
- * The caller must be careful to use only minimal amounts of stack until
- * the cloned thread has returned.
- * There is a good chance that the cloned thread and the caller will share
- * the same copy of errno!
- */
-#ifdef __GNUC__
-#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
-/* Try to force this function into a separate stack frame, and make sure
- * that arguments are passed on the stack.
- */
-static int local_clone (int (*fn)(void *), void *arg, ...)
-  __attribute__ ((noinline));
-#endif
-#endif
-
-/* To avoid the gap cross page boundaries, increase by the large parge
- * size mostly PowerPC system uses.  */
-#ifdef __PPC64__
-#define CLONE_STACK_SIZE 65536
-#else
-#define CLONE_STACK_SIZE 4096
-#endif
-
-static int local_clone (int (*fn)(void *), void *arg, ...) {
-  /* Leave 4kB of gap between the callers stack and the new clone. This
-   * should be more than sufficient for the caller to call waitpid() until
-   * the cloned thread terminates.
-   *
-   * It is important that we set the CLONE_UNTRACED flag, because newer
-   * versions of "gdb" otherwise attempt to attach to our thread, and will
-   * attempt to reap its status codes. This subsequently results in the
-   * caller hanging indefinitely in waitpid(), waiting for a change in
-   * status that will never happen. By setting the CLONE_UNTRACED flag, we
-   * prevent "gdb" from stealing events, but we still expect the thread
-   * lister to fail, because it cannot PTRACE_ATTACH to the process that
-   * is being debugged. This is OK and the error code will be reported
-   * correctly.
-   */
-  return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE,
-                   CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0);
-}
-
-
-/* Local substitute for the atoi() function, which is not necessarily safe
- * to call once threads are suspended (depending on whether libc looks up
- * locale information,  when executing atoi()).
- */
-static int local_atoi(const char *s) {
-  int n   = 0;
-  int neg = *s == '-';
-  if (neg)
-    s++;
-  while (*s >= '0' && *s <= '9')
-    n = 10*n + (*s++ - '0');
-  return neg ? -n : n;
-}
-
-
-/* Re-runs fn until it doesn't cause EINTR
- */
-#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
-
-
-/* Wrap a class around system calls, in order to give us access to
- * a private copy of errno. This only works in C++, but it has the
- * advantage of not needing nested functions, which are a non-standard
- * language extension.
- */
-#ifdef __cplusplus
-namespace {
-  class SysCalls {
-   public:
-    #define SYS_CPLUSPLUS
-    #define SYS_ERRNO     my_errno
-    #define SYS_INLINE    inline
-    #define SYS_PREFIX    -1
-    #undef  SYS_LINUX_SYSCALL_SUPPORT_H
-    #include "linux_syscall_support.h"
-    SysCalls() : my_errno(0) { }
-    int my_errno;
-  };
-}
-#define ERRNO sys.my_errno
-#else
-#define ERRNO my_errno
-#endif
-
-
-/* Wrapper for open() which is guaranteed to never return EINTR.
- */
-static int c_open(const char *fname, int flags, int mode) {
-  ssize_t rc;
-  NO_INTR(rc = sys_open(fname, flags, mode));
-  return rc;
-}
-
-
-/* abort() is not safely reentrant, and changes it's behavior each time
- * it is called. This means, if the main application ever called abort()
- * we cannot safely call it again. This would happen if we were called
- * from a SIGABRT signal handler in the main application. So, document
- * that calling SIGABRT from the thread lister makes it not signal safe
- * (and vice-versa).
- * Also, since we share address space with the main application, we
- * cannot call abort() from the callback and expect the main application
- * to behave correctly afterwards. In fact, the only thing we can do, is
- * to terminate the main application with extreme prejudice (aka
- * PTRACE_KILL).
- * We set up our own SIGABRT handler to do this.
- * In order to find the main application from the signal handler, we
- * need to store information about it in global variables. This is
- * safe, because the main application should be suspended at this
- * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then
- * we are running a higher risk, though. So, try to avoid calling
- * abort() after calling TCMalloc_ResumeAllProcessThreads.
- */
-static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker;
-
-
-/* Signal handler to help us recover from dying while we are attached to
- * other threads.
- */
-static void SignalHandler(int signum, siginfo_t *si, void *data) {
-  if (sig_pids != NULL) {
-    if (signum == SIGABRT) {
-      while (sig_num_threads-- > 0) {
-        /* Not sure if sched_yield is really necessary here, but it does not */
-        /* hurt, and it might be necessary for the same reasons that we have */
-        /* to do so in sys_ptrace_detach().                                  */
-        sys_sched_yield();
-        sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0);
-      }
-    } else if (sig_num_threads > 0) {
-      TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids);
-    }
-  }
-  sig_pids = NULL;
-  if (sig_marker >= 0)
-    NO_INTR(sys_close(sig_marker));
-  sig_marker = -1;
-  if (sig_proc >= 0)
-    NO_INTR(sys_close(sig_proc));
-  sig_proc = -1;
-
-  sys__exit(signum == SIGABRT ? 1 : 2);
-}
-
-
-/* Try to dirty the stack, and hope that the compiler is not smart enough
- * to optimize this function away. Or worse, the compiler could inline the
- * function and permanently allocate the data on the stack.
- */
-static void DirtyStack(size_t amount) {
-  char buf[amount];
-  memset(buf, 0, amount);
-  sys_read(-1, buf, amount);
-}
-
-
-/* Data structure for passing arguments to the lister thread.
- */
-#define ALT_STACKSIZE (MINSIGSTKSZ + 4096)
-
-struct ListerParams {
-  int         result, err;
-  char        *altstack_mem;
-  ListAllProcessThreadsCallBack callback;
-  void        *parameter;
-  va_list     ap;
-  sem_t       *lock;
-};
-
-
-static void ListerThread(struct ListerParams *args) {
-  int                found_parent = 0;
-  pid_t              clone_pid  = sys_gettid(), ppid = sys_getppid();
-  char               proc_self_task[80], marker_name[48], *marker_path;
-  const char         *proc_paths[3];
-  const char *const  *proc_path = proc_paths;
-  int                proc = -1, marker = -1, num_threads = 0;
-  int                max_threads = 0, sig;
-  struct kernel_stat marker_sb, proc_sb;
-  stack_t            altstack;
-
-  /* Wait for parent thread to set appropriate permissions
-   * to allow ptrace activity
-   */
-  if (sem_wait(args->lock) < 0) {
-    goto failure;
-  }
-
-  /* Create "marker" that we can use to detect threads sharing the same
-   * address space and the same file handles. By setting the FD_CLOEXEC flag
-   * we minimize the risk of misidentifying child processes as threads;
-   * and since there is still a race condition,  we will filter those out
-   * later, anyway.
-   */
-  if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 ||
-      sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) {
-  failure:
-    args->result = -1;
-    args->err    = errno;
-    if (marker >= 0)
-      NO_INTR(sys_close(marker));
-    sig_marker = marker = -1;
-    if (proc >= 0)
-      NO_INTR(sys_close(proc));
-    sig_proc = proc = -1;
-    sys__exit(1);
-  }
-
-  /* Compute search paths for finding thread directories in /proc            */
-  local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
-  strcpy(marker_name, proc_self_task);
-  marker_path = marker_name + strlen(marker_name);
-  strcat(proc_self_task, "/task/");
-  proc_paths[0] = proc_self_task; /* /proc/$$/task/                          */
-  proc_paths[1] = "/proc/";       /* /proc/                                  */
-  proc_paths[2] = NULL;
-
-  /* Compute path for marker socket in /proc                                 */
-  local_itoa(strcpy(marker_path, "/fd/") + 4, marker);
-  if (sys_stat(marker_name, &marker_sb) < 0) {
-    goto failure;
-  }
-
-  /* Catch signals on an alternate pre-allocated stack. This way, we can
-   * safely execute the signal handler even if we ran out of memory.
-   */
-  memset(&altstack, 0, sizeof(altstack));
-  altstack.ss_sp    = args->altstack_mem;
-  altstack.ss_flags = 0;
-  altstack.ss_size  = ALT_STACKSIZE;
-  sys_sigaltstack(&altstack, (const stack_t *)NULL);
-
-  /* Some kernels forget to wake up traced processes, when the
-   * tracer dies.  So, intercept synchronous signals and make sure
-   * that we wake up our tracees before dying. It is the caller's
-   * responsibility to ensure that asynchronous signals do not
-   * interfere with this function.
-   */
-  sig_marker = marker;
-  sig_proc   = -1;
-  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
-    struct kernel_sigaction sa;
-    memset(&sa, 0, sizeof(sa));
-    sa.sa_sigaction_ = SignalHandler;
-    sys_sigfillset(&sa.sa_mask);
-    sa.sa_flags      = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND;
-    sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL);
-  }
-  
-  /* Read process directories in /proc/...                                   */
-  for (;;) {
-    /* Some kernels know about threads, and hide them in "/proc"
-     * (although they are still there, if you know the process
-     * id). Threads are moved into a separate "task" directory. We
-     * check there first, and then fall back on the older naming
-     * convention if necessary.
-     */
-    if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) {
-      if (*++proc_path != NULL)
-        continue;
-      goto failure;
-    }
-    if (sys_fstat(proc, &proc_sb) < 0)
-      goto failure;
-    
-    /* Since we are suspending threads, we cannot call any libc
-     * functions that might acquire locks. Most notably, we cannot
-     * call malloc(). So, we have to allocate memory on the stack,
-     * instead. Since we do not know how much memory we need, we
-     * make a best guess. And if we guessed incorrectly we retry on
-     * a second iteration (by jumping to "detach_threads").
-     *
-     * Unless the number of threads is increasing very rapidly, we
-     * should never need to do so, though, as our guestimate is very
-     * conservative.
-     */
-    if (max_threads < proc_sb.st_nlink + 100)
-      max_threads = proc_sb.st_nlink + 100;
-    
-    /* scope */ {
-      pid_t pids[max_threads];
-      int   added_entries = 0;
-      sig_num_threads     = num_threads;
-      sig_pids            = pids;
-      for (;;) {
-        struct KERNEL_DIRENT *entry;
-        char buf[4096];
-        ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf,
-                                         sizeof(buf));
-        if (nbytes < 0)
-          goto failure;
-        else if (nbytes == 0) {
-          if (added_entries) {
-            /* Need to keep iterating over "/proc" in multiple
-             * passes until we no longer find any more threads. This
-             * algorithm eventually completes, when all threads have
-             * been suspended.
-             */
-            added_entries = 0;
-            sys_lseek(proc, 0, SEEK_SET);
-            continue;
-          }
-          break;
-        }
-        for (entry = (struct KERNEL_DIRENT *)buf;
-             entry < (struct KERNEL_DIRENT *)&buf[nbytes];
-             entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) {
-          if (entry->d_ino != 0) {
-            const char *ptr = entry->d_name;
-            pid_t pid;
-            
-            /* Some kernels hide threads by preceding the pid with a '.'     */
-            if (*ptr == '.')
-              ptr++;
-            
-            /* If the directory is not numeric, it cannot be a
-             * process/thread
-             */
-            if (*ptr < '0' || *ptr > '9')
-              continue;
-            pid = local_atoi(ptr);
-
-            /* Attach (and suspend) all threads                              */
-            if (pid && pid != clone_pid) {
-              struct kernel_stat tmp_sb;
-              char fname[entry->d_reclen + 48];
-              strcat(strcat(strcpy(fname, "/proc/"),
-                            entry->d_name), marker_path);
-              
-              /* Check if the marker is identical to the one we created      */
-              if (sys_stat(fname, &tmp_sb) >= 0 &&
-                  marker_sb.st_ino == tmp_sb.st_ino) {
-                long i, j;
-
-                /* Found one of our threads, make sure it is no duplicate    */
-                for (i = 0; i < num_threads; i++) {
-                  /* Linear search is slow, but should not matter much for
-                   * the typically small number of threads.
-                   */
-                  if (pids[i] == pid) {
-                    /* Found a duplicate; most likely on second pass         */
-                    goto next_entry;
-                  }
-                }
-                
-                /* Check whether data structure needs growing                */
-                if (num_threads >= max_threads) {
-                  /* Back to square one, this time with more memory          */
-                  NO_INTR(sys_close(proc));
-                  goto detach_threads;
-                }
-
-                /* Attaching to thread suspends it                           */
-                pids[num_threads++] = pid;
-                sig_num_threads     = num_threads;
-                if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0,
-                               (void *)0) < 0) {
-                  /* If operation failed, ignore thread. Maybe it
-                   * just died?  There might also be a race
-                   * condition with a concurrent core dumper or
-                   * with a debugger. In that case, we will just
-                   * make a best effort, rather than failing
-                   * entirely.
-                   */
-                  num_threads--;
-                  sig_num_threads = num_threads;
-                  goto next_entry;
-                }
-                while (sys_waitpid(pid, (int *)0, __WALL) < 0) {
-                  if (errno != EINTR) {
-                    sys_ptrace_detach(pid);
-                    num_threads--;
-                    sig_num_threads = num_threads;
-                    goto next_entry;
-                  }
-                }
-
-                if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j ||
-                    sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i   != j) {
-                  /* Address spaces are distinct, even though both
-                   * processes show the "marker". This is probably
-                   * a forked child process rather than a thread.
-                   */
-                  sys_ptrace_detach(pid);
-                  num_threads--;
-                  sig_num_threads = num_threads;
-                } else {
-                  found_parent |= pid == ppid;
-                  added_entries++;
-                }
-              }
-            }
-          }
-        next_entry:;
-        }
-      }
-      NO_INTR(sys_close(proc));
-      sig_proc = proc = -1;
-
-      /* If we failed to find any threads, try looking somewhere else in
-       * /proc. Maybe, threads are reported differently on this system.
-       */
-      if (num_threads > 1 || !*++proc_path) {
-        NO_INTR(sys_close(marker));
-        sig_marker = marker = -1;
-
-        /* If we never found the parent process, something is very wrong.
-         * Most likely, we are running in debugger. Any attempt to operate
-         * on the threads would be very incomplete. Let's just report an
-         * error to the caller.
-         */
-        if (!found_parent) {
-          TCMalloc_ResumeAllProcessThreads(num_threads, pids);
-          sys__exit(3);
-        }
-
-        /* Now we are ready to call the callback,
-         * which takes care of resuming the threads for us.
-         */
-        args->result = args->callback(args->parameter, num_threads,
-                                      pids, args->ap);
-        args->err = errno;
-
-        /* Callback should have resumed threads, but better safe than sorry  */
-        if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) {
-          /* Callback forgot to resume at least one thread, report error     */
-          args->err    = EINVAL;
-          args->result = -1;
-        }
-
-        sys__exit(0);
-      }
-    detach_threads:
-      /* Resume all threads prior to retrying the operation                  */
-      TCMalloc_ResumeAllProcessThreads(num_threads, pids);
-      sig_pids = NULL;
-      num_threads = 0;
-      sig_num_threads = num_threads;
-      max_threads += 100;
-    }
-  }
-}
-
-
-/* This function gets the list of all linux threads of the current process
- * passes them to the 'callback' along with the 'parameter' pointer; at the
- * call back call time all the threads are paused via
- * PTRACE_ATTACH.
- * The callback is executed from a separate thread which shares only the
- * address space, the filesystem, and the filehandles with the caller. Most
- * notably, it does not share the same pid and ppid; and if it terminates,
- * the rest of the application is still there. 'callback' is supposed to do
- * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
- * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
- * signals are blocked. If the 'callback' decides to unblock them, it must
- * ensure that they cannot terminate the application, or that
- * TCMalloc_ResumeAllProcessThreads will get called.
- * It is an error for the 'callback' to make any library calls that could
- * acquire locks. Most notably, this means that most system calls have to
- * avoid going through libc. Also, this means that it is not legal to call
- * exit() or abort().
- * We return -1 on error and the return value of 'callback' on success.
- */
-int TCMalloc_ListAllProcessThreads(void *parameter,
-                                   ListAllProcessThreadsCallBack callback, ...) {
-  char                   altstack_mem[ALT_STACKSIZE];
-  struct ListerParams    args;
-  pid_t                  clone_pid;
-  int                    dumpable = 1, sig;
-  struct kernel_sigset_t sig_blocked, sig_old;
-  sem_t                  lock;
-
-  va_start(args.ap, callback);
-
-  /* If we are short on virtual memory, initializing the alternate stack
-   * might trigger a SIGSEGV. Let's do this early, before it could get us
-   * into more trouble (i.e. before signal handlers try to use the alternate
-   * stack, and before we attach to other threads).
-   */
-  memset(altstack_mem, 0, sizeof(altstack_mem));
-
-  /* Some of our cleanup functions could conceivable use more stack space.
-   * Try to touch the stack right now. This could be defeated by the compiler
-   * being too smart for it's own good, so try really hard.
-   */
-  DirtyStack(32768);
-
-  /* Make this process "dumpable". This is necessary in order to ptrace()
-   * after having called setuid().
-   */
-  dumpable = sys_prctl(PR_GET_DUMPABLE, 0);
-  if (!dumpable)
-    sys_prctl(PR_SET_DUMPABLE, 1);
-
-  /* Fill in argument block for dumper thread                                */
-  args.result       = -1;
-  args.err          = 0;
-  args.altstack_mem = altstack_mem;
-  args.parameter    = parameter;
-  args.callback     = callback;
-  args.lock         = &lock;
-
-  /* Before cloning the thread lister, block all asynchronous signals, as we */
-  /* are not prepared to handle them.                                        */
-  sys_sigfillset(&sig_blocked);
-  for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) {
-    sys_sigdelset(&sig_blocked, sync_signals[sig]);
-  }
-  if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) {
-    args.err = errno;
-    args.result = -1;
-    goto failed;
-  }
-
-  /* scope */ {
-    /* After cloning, both the parent and the child share the same instance
-     * of errno. We must make sure that at least one of these processes
-     * (in our case, the parent) uses modified syscall macros that update
-     * a local copy of errno, instead.
-     */
-    #ifdef __cplusplus
-      #define sys0_sigprocmask sys.sigprocmask
-      #define sys0_waitpid     sys.waitpid
-      SysCalls sys;
-    #else
-      int my_errno;
-      #define SYS_ERRNO        my_errno
-      #define SYS_INLINE       inline
-      #define SYS_PREFIX       0
-      #undef  SYS_LINUX_SYSCALL_SUPPORT_H
-      #include "linux_syscall_support.h"
-    #endif
-
-    /* Lock before clone so that parent can set
-	 * ptrace permissions (if necessary) prior
-     * to ListerThread actually executing
-     */
-    if (sem_init(&lock, 0, 0) == 0) {
-
-      int clone_errno;
-      clone_pid = local_clone((int (*)(void *))ListerThread, &args);
-      clone_errno = errno;
-
-      sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
-
-      if (clone_pid >= 0) {
-#ifdef PR_SET_PTRACER
-        /* In newer versions of glibc permission must explicitly
-         * be given to allow for ptrace.
-         */
-        prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0);
-#endif
-        /* Releasing the lock here allows the
-         * ListerThread to execute and ptrace us.
-		 */
-        sem_post(&lock);
-        int status, rc;
-        while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 &&
-               ERRNO == EINTR) {
-                /* Keep waiting                                                 */
-        }
-        if (rc < 0) {
-          args.err = ERRNO;
-          args.result = -1;
-        } else if (WIFEXITED(status)) {
-          switch (WEXITSTATUS(status)) {
-            case 0: break;             /* Normal process termination           */
-            case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected   */
-                    args.result = -1;
-                    break;
-            case 3: args.err = EPERM;  /* Process is already being traced      */
-                    args.result = -1;
-                    break;
-            default:args.err = ECHILD; /* Child died unexpectedly              */
-                    args.result = -1;
-                    break;
-          }
-        } else if (!WIFEXITED(status)) {
-          args.err    = EFAULT;        /* Terminated due to an unhandled signal*/
-          args.result = -1;
-        }
-        sem_destroy(&lock);
-      } else {
-        args.result = -1;
-        args.err    = clone_errno;
-      }
-    } else {
-      args.result = -1;
-      args.err    = errno;
-    }
-  }
-
-  /* Restore the "dumpable" state of the process                             */
-failed:
-  if (!dumpable)
-    sys_prctl(PR_SET_DUMPABLE, dumpable);
-
-  va_end(args.ap);
-
-  errno = args.err;
-  return args.result;
-}
-
-/* This function resumes the list of all linux threads that
- * TCMalloc_ListAllProcessThreads pauses before giving to its callback.
- * The function returns non-zero if at least one thread was
- * suspended and has now been resumed.
- */
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
-  int detached_at_least_one = 0;
-  while (num_threads-- > 0) {
-    detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0;
-  }
-  return detached_at_least_one;
-}
-
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/contrib/libtcmalloc/src/base/linuxthreads.h b/contrib/libtcmalloc/src/base/linuxthreads.h
deleted file mode 100644
index 09ce45fc13f..00000000000
--- a/contrib/libtcmalloc/src/base/linuxthreads.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#ifndef _LINUXTHREADS_H
-#define _LINUXTHREADS_H
-
-/* Include thread_lister.h to get the interface that we implement for linux.
- */
-
-/* We currently only support certain platforms on Linux. Porting to other
- * related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__) || defined(__aarch64__) ||       \
-     defined(__s390__)) && defined(__linux)
-
-/* Define the THREADS symbol to make sure that there is exactly one core dumper
- * built into the library.
- */
-#define THREADS "Linux /proc"
-
-#endif
-
-#endif  /* _LINUXTHREADS_H */
diff --git a/contrib/libtcmalloc/src/base/logging.cc b/contrib/libtcmalloc/src/base/logging.cc
deleted file mode 100644
index 2b0adcb8945..00000000000
--- a/contrib/libtcmalloc/src/base/logging.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file just provides storage for FLAGS_verbose.
-
-#include "../config.h"
-#include "base/logging.h"
-#include "base/commandlineflags.h"
-
-DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0),
-             "Set to numbers >0 for more verbose output, or <0 for less.  "
-             "--verbose == -4 means we log fatal errors only.");
-
-
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-// While windows does have a POSIX-compatible API
-// (_open/_write/_close), it acquires memory.  Using this lower-level
-// windows API is the closest we can get to being "raw".
-RawFD RawOpenForWriting(const char* filename) {
-  // CreateFile allocates memory if file_name isn't absolute, so if
-  // that ever becomes a problem then we ought to compute the absolute
-  // path on its behalf (perhaps the ntdll/kernel function isn't aware
-  // of the working directory?)
-  RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL,
-                         CREATE_ALWAYS, 0, NULL);
-  if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS)
-    SetEndOfFile(fd);    // truncate the existing file
-  return fd;
-}
-
-void RawWrite(RawFD handle, const char* buf, size_t len) {
-  while (len > 0) {
-    DWORD wrote;
-    BOOL ok = WriteFile(handle, buf, len, &wrote, NULL);
-    // We do not use an asynchronous file handle, so ok==false means an error
-    if (!ok) break;
-    buf += wrote;
-    len -= wrote;
-  }
-}
-
-void RawClose(RawFD handle) {
-  CloseHandle(handle);
-}
-
-#else  // _WIN32 || __CYGWIN__ || __CYGWIN32__
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-
-// Re-run fn until it doesn't cause EINTR.
-#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
-
-RawFD RawOpenForWriting(const char* filename) {
-  return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664);
-}
-
-void RawWrite(RawFD fd, const char* buf, size_t len) {
-  while (len > 0) {
-    ssize_t r;
-    NO_INTR(r = write(fd, buf, len));
-    if (r <= 0) break;
-    buf += r;
-    len -= r;
-  }
-}
-
-void RawClose(RawFD fd) {
-  NO_INTR(close(fd));
-}
-
-#endif  // _WIN32 || __CYGWIN__ || __CYGWIN32__
diff --git a/contrib/libtcmalloc/src/base/logging.h b/contrib/libtcmalloc/src/base/logging.h
deleted file mode 100644
index fa22489bea3..00000000000
--- a/contrib/libtcmalloc/src/base/logging.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// This file contains #include information about logging-related stuff.
-// Pretty much everybody needs to #include this file so that they can
-// log various happenings.
-//
-#ifndef _LOGGING_H_
-#define _LOGGING_H_
-
-#include "../config.h"
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for write()
-#endif
-#include <string.h>    // for strlen(), strcmp()
-#include <assert.h>
-#include <errno.h>     // for errno
-#include "base/commandlineflags.h"
-
-// On some systems (like freebsd), we can't call write() at all in a
-// global constructor, perhaps because errno hasn't been set up.
-// (In windows, we can't call it because it might call malloc.)
-// Calling the write syscall is safer (it doesn't set errno), so we
-// prefer that.  Note we don't care about errno for logging: we just
-// do logging on a best-effort basis.
-#if defined(_MSC_VER)
-#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len);  // in port.cc
-#elif defined(HAVE_SYS_SYSCALL_H)
-#include <sys/syscall.h>
-#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len)
-#else
-#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len)
-#endif
-
-// MSVC and mingw define their own, safe version of vnsprintf (the
-// windows one in broken) in port.cc.  Everyone else can use the
-// version here.  We had to give it a unique name for windows.
-#ifndef _WIN32
-# define perftools_vsnprintf vsnprintf
-#endif
-
-
-// We log all messages at this log-level and below.
-// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4
-DECLARE_int32(verbose);
-
-// CHECK dies with a fatal error if condition is not true.  It is *not*
-// controlled by NDEBUG, so the check will be executed regardless of
-// compilation mode.  Therefore, it is safe to do things like:
-//    CHECK(fp->Write(x) == 4)
-// Note we use write instead of printf/puts to avoid the risk we'll
-// call malloc().
-#define CHECK(condition)                                                \
-  do {                                                                  \
-    if (!(condition)) {                                                 \
-      WRITE_TO_STDERR("Check failed: " #condition "\n",                 \
-                      sizeof("Check failed: " #condition "\n")-1);      \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-// This takes a message to print.  The name is historical.
-#define RAW_CHECK(condition, message)                                          \
-  do {                                                                         \
-    if (!(condition)) {                                                        \
-      WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \
-                      sizeof("Check failed: " #condition ": " message "\n")-1);\
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-// This is like RAW_CHECK, but only in debug-mode
-#ifdef NDEBUG
-enum { DEBUG_MODE = 0 };
-#define RAW_DCHECK(condition, message)
-#else
-enum { DEBUG_MODE = 1 };
-#define RAW_DCHECK(condition, message)  RAW_CHECK(condition, message)
-#endif
-
-// This prints errno as well.  Note we use write instead of printf/puts to
-// avoid the risk we'll call malloc().
-#define PCHECK(condition)                                               \
-  do {                                                                  \
-    if (!(condition)) {                                                 \
-      const int err_no = errno;                                         \
-      WRITE_TO_STDERR("Check failed: " #condition ": ",                 \
-                      sizeof("Check failed: " #condition ": ")-1);      \
-      WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \
-      WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-// Helper macro for binary operators; prints the two values on error
-// Don't use this macro directly in your code, use CHECK_EQ et al below
-
-// WARNING: These don't compile correctly if one of the arguments is a pointer
-// and the other is NULL. To work around this, simply static_cast NULL to the
-// type of the desired pointer.
-
-// TODO(jandrews): Also print the values in case of failure.  Requires some
-// sort of type-sensitive ToString() function.
-#define CHECK_OP(op, val1, val2)                                        \
-  do {                                                                  \
-    if (!((val1) op (val2))) {                                          \
-      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \
-      abort();                                                          \
-    }                                                                   \
-  } while (0)
-
-#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
-#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
-#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
-#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
-#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
-#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
-
-// Synonyms for CHECK_* that are used in some unittests.
-#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
-#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
-#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
-#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
-#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
-#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
-#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
-#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
-#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
-#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
-#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
-#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
-// As are these variants.
-#define EXPECT_TRUE(cond)     CHECK(cond)
-#define EXPECT_FALSE(cond)    CHECK(!(cond))
-#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
-#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
-#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
-#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)
-
-// Used for (libc) functions that return -1 and set errno
-#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)
-
-// A few more checks that only happen in debug mode
-#ifdef NDEBUG
-#define DCHECK_EQ(val1, val2)
-#define DCHECK_NE(val1, val2)
-#define DCHECK_LE(val1, val2)
-#define DCHECK_LT(val1, val2)
-#define DCHECK_GE(val1, val2)
-#define DCHECK_GT(val1, val2)
-#else
-#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
-#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
-#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
-#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
-#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
-#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
-#endif
-
-
-#ifdef ERROR
-#undef ERROR      // may conflict with ERROR macro on windows
-#endif
-enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4};
-
-// NOTE: we add a newline to the end of the output if it's not there already
-inline void LogPrintf(int severity, const char* pat, va_list ap) {
-  // We write directly to the stderr file descriptor and avoid FILE
-  // buffering because that may invoke malloc()
-  char buf[600];
-  perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap);
-  if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') {
-    assert(strlen(buf)+1 < sizeof(buf));
-    strcat(buf, "\n");
-  }
-  WRITE_TO_STDERR(buf, strlen(buf));
-  if ((severity) == FATAL)
-    abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls
-}
-
-// Note that since the order of global constructors is unspecified,
-// global code that calls RAW_LOG may execute before FLAGS_verbose is set.
-// Such code will run with verbosity == 0 no matter what.
-#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity)
-
-// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it.
-#define LOG_PRINTF(severity, pat) do {          \
-  if (VLOG_IS_ON(severity)) {                   \
-    va_list ap;                                 \
-    va_start(ap, pat);                          \
-    LogPrintf(severity, pat, ap);               \
-    va_end(ap);                                 \
-  }                                             \
-} while (0)
-
-// RAW_LOG is the main function; some synonyms are used in unittests.
-inline void RAW_LOG(int lvl, const char* pat, ...)  { LOG_PRINTF(lvl, pat); }
-inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); }
-inline void LOG(int lvl, const char* pat, ...)      { LOG_PRINTF(lvl, pat); }
-inline void VLOG(int lvl, const char* pat, ...)     { LOG_PRINTF(lvl, pat); }
-inline void LOG_IF(int lvl, bool cond, const char* pat, ...) {
-  if (cond)  LOG_PRINTF(lvl, pat);
-}
-
-// This isn't technically logging, but it's also IO and also is an
-// attempt to be "raw" -- that is, to not use any higher-level libc
-// routines that might allocate memory or (ideally) try to allocate
-// locks.  We use an opaque file handle (not necessarily an int)
-// to allow even more low-level stuff in the future.
-// Like other "raw" routines, these functions are best effort, and
-// thus don't return error codes (except RawOpenForWriting()).
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-#ifndef NOMINMAX
-#define NOMINMAX     // @#!$& windows
-#endif
-#include <windows.h>
-typedef HANDLE RawFD;
-const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE;
-#else
-typedef int RawFD;
-const RawFD kIllegalRawFD = -1;   // what open returns if it fails
-#endif  // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-RawFD RawOpenForWriting(const char* filename);   // uses default permissions
-void RawWrite(RawFD fd, const char* buf, size_t len);
-void RawClose(RawFD fd);
-
-#endif // _LOGGING_H_
diff --git a/contrib/libtcmalloc/src/base/low_level_alloc.cc b/contrib/libtcmalloc/src/base/low_level_alloc.cc
deleted file mode 100644
index 6b467cff123..00000000000
--- a/contrib/libtcmalloc/src/base/low_level_alloc.cc
+++ /dev/null
@@ -1,582 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// A low-level allocator that can be used by other low-level
-// modules without introducing dependency cycles.
-// This allocator is slow and wasteful of memory;
-// it should not be used when performance is key.
-
-#include "base/low_level_alloc.h"
-#include "base/dynamic_annotations.h"
-#include "base/spinlock.h"
-#include "base/logging.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#include <new>                   // for placement-new
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-// A first-fit allocator with amortized logarithmic free() time.
-
-LowLevelAlloc::PagesAllocator::~PagesAllocator() {
-}
-
-// ---------------------------------------------------------------------------
-static const int kMaxLevel = 30;
-
-// We put this class-only struct in a namespace to avoid polluting the
-// global namespace with this struct name (thus risking an ODR violation).
-namespace low_level_alloc_internal {
-  // This struct describes one allocated block, or one free block.
-  struct AllocList {
-    struct Header {
-      intptr_t size;  // size of entire region, including this field. Must be
-                      // first.  Valid in both allocated and unallocated blocks
-      intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this
-      LowLevelAlloc::Arena *arena; // pointer to parent arena
-      void *dummy_for_alignment;   // aligns regions to 0 mod 2*sizeof(void*)
-    } header;
-
-    // Next two fields: in unallocated blocks: freelist skiplist data
-    //                  in allocated blocks: overlaps with client data
-    int levels;           // levels in skiplist used
-    AllocList *next[kMaxLevel];   // actually has levels elements.
-                                  // The AllocList node may not have room for
-                                  // all kMaxLevel entries.  See max_fit in
-                                  // LLA_SkiplistLevels()
-  };
-}
-using low_level_alloc_internal::AllocList;
-
-
-// ---------------------------------------------------------------------------
-// A trivial skiplist implementation.  This is used to keep the freelist
-// in address order while taking only logarithmic time per insert and delete.
-
-// An integer approximation of log2(size/base)
-// Requires size >= base.
-static int IntLog2(size_t size, size_t base) {
-  int result = 0;
-  for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result)
-    result++;
-  }
-  //    floor(size / 2**result) <= base < floor(size / 2**(result-1))
-  // =>     log2(size/(base+1)) <= result < 1+log2(size/base)
-  // => result ~= log2(size/base)
-  return result;
-}
-
-// Return a random integer n:  p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1.
-static int Random() {
-  static uint32 r = 1;         // no locking---it's not critical
-  ANNOTATE_BENIGN_RACE(&r, "benign race, not critical.");
-  int result = 1;
-  while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) {
-    result++;
-  }
-  return result;
-}
-
-// Return a number of skiplist levels for a node of size bytes, where
-// base is the minimum node size.  Compute level=log2(size / base)+n
-// where n is 1 if random is false and otherwise a random number generated with
-// the standard distribution for a skiplist:  See Random() above.
-// Bigger nodes tend to have more skiplist levels due to the log2(size / base)
-// term, so first-fit searches touch fewer nodes.  "level" is clipped so
-// level<kMaxLevel and next[level-1] will fit in the node.
-// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel
-static int LLA_SkiplistLevels(size_t size, size_t base, bool random) {
-  // max_fit is the maximum number of levels that will fit in a node for the
-  // given size.   We can't return more than max_fit, no matter what the
-  // random number generator says.
-  int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *);
-  int level = IntLog2(size, base) + (random? Random() : 1);
-  if (level > max_fit)     level = max_fit;
-  if (level > kMaxLevel-1) level = kMaxLevel - 1;
-  RAW_CHECK(level >= 1, "block not big enough for even one level");
-  return level;
-}
-
-// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e.
-// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater
-// points to the last element at level i in the AllocList less than *e, or is
-// head if no such element exists.
-static AllocList *LLA_SkiplistSearch(AllocList *head,
-                                     AllocList *e, AllocList **prev) {
-  AllocList *p = head;
-  for (int level = head->levels - 1; level >= 0; level--) {
-    for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) {
-    }
-    prev[level] = p;
-  }
-  return (head->levels == 0) ?  0 : prev[0]->next[0];
-}
-
-// Insert element *e into AllocList *head.  Set prev[] as LLA_SkiplistSearch.
-// Requires that e->levels be previously set by the caller (using
-// LLA_SkiplistLevels())
-static void LLA_SkiplistInsert(AllocList *head, AllocList *e,
-                               AllocList **prev) {
-  LLA_SkiplistSearch(head, e, prev);
-  for (; head->levels < e->levels; head->levels++) { // extend prev pointers
-    prev[head->levels] = head;                       // to all *e's levels
-  }
-  for (int i = 0; i != e->levels; i++) { // add element to list
-    e->next[i] = prev[i]->next[i];
-    prev[i]->next[i] = e;
-  }
-}
-
-// Remove element *e from AllocList *head.  Set prev[] as LLA_SkiplistSearch().
-// Requires that e->levels be previous set by the caller (using
-// LLA_SkiplistLevels())
-static void LLA_SkiplistDelete(AllocList *head, AllocList *e,
-                               AllocList **prev) {
-  AllocList *found = LLA_SkiplistSearch(head, e, prev);
-  RAW_CHECK(e == found, "element not in freelist");
-  for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) {
-    prev[i]->next[i] = e->next[i];
-  }
-  while (head->levels > 0 && head->next[head->levels - 1] == 0) {
-    head->levels--;   // reduce head->levels if level unused
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Arena implementation
-
-struct LowLevelAlloc::Arena {
-  Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init
-  explicit Arena(int) : pagesize(0) {}  // set pagesize to zero explicitly
-                                        // for non-static init
-
-  SpinLock mu;            // protects freelist, allocation_count,
-                          // pagesize, roundup, min_size
-  AllocList freelist;     // head of free list; sorted by addr (under mu)
-  int32 allocation_count; // count of allocated blocks (under mu)
-  int32 flags;            // flags passed to NewArena (ro after init)
-  size_t pagesize;        // ==getpagesize()  (init under mu, then ro)
-  size_t roundup;         // lowest power of 2 >= max(16,sizeof (AllocList))
-                          // (init under mu, then ro)
-  size_t min_size;        // smallest allocation block size
-                          // (init under mu, then ro)
-  PagesAllocator *allocator;
-};
-
-// The default arena, which is used when 0 is passed instead of an Arena
-// pointer.
-static struct LowLevelAlloc::Arena default_arena;
-
-// Non-malloc-hooked arenas: used only to allocate metadata for arenas that
-// do not want malloc hook reporting, so that for them there's no malloc hook
-// reporting even during arena creation.
-static struct LowLevelAlloc::Arena unhooked_arena;
-static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena;
-
-namespace {
-
-  class DefaultPagesAllocator : public LowLevelAlloc::PagesAllocator {
-  public:
-    virtual ~DefaultPagesAllocator() {};
-    virtual void *MapPages(int32 flags, size_t size);
-    virtual void UnMapPages(int32 flags, void *addr, size_t size);
-  };
-
-}
-
-// magic numbers to identify allocated and unallocated blocks
-static const intptr_t kMagicAllocated = 0x4c833e95;
-static const intptr_t kMagicUnallocated = ~kMagicAllocated;
-
-namespace {
-  class SCOPED_LOCKABLE ArenaLock {
-   public:
-    explicit ArenaLock(LowLevelAlloc::Arena *arena)
-        EXCLUSIVE_LOCK_FUNCTION(arena->mu)
-        : left_(false), mask_valid_(false), arena_(arena) {
-      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-      // We've decided not to support async-signal-safe arena use until
-      // there a demonstrated need.  Here's how one could do it though
-      // (would need to be made more portable).
-#if 0
-        sigset_t all;
-        sigfillset(&all);
-        this->mask_valid_ =
-            (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0);
-#else
-        RAW_CHECK(false, "We do not yet support async-signal-safe arena.");
-#endif
-      }
-      this->arena_->mu.Lock();
-    }
-    ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
-    void Leave() /*UNLOCK_FUNCTION()*/ {
-      this->arena_->mu.Unlock();
-#if 0
-      if (this->mask_valid_) {
-        pthread_sigmask(SIG_SETMASK, &this->mask_, 0);
-      }
-#endif
-      this->left_ = true;
-    }
-   private:
-    bool left_;       // whether left region
-    bool mask_valid_;
-#if 0
-    sigset_t mask_;   // old mask of blocked signals
-#endif
-    LowLevelAlloc::Arena *arena_;
-    DISALLOW_COPY_AND_ASSIGN(ArenaLock);
-  };
-} // anonymous namespace
-
-// create an appropriate magic number for an object at "ptr"
-// "magic" should be kMagicAllocated or kMagicUnallocated
-inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) {
-  return magic ^ reinterpret_cast<intptr_t>(ptr);
-}
-
-// Initialize the fields of an Arena
-static void ArenaInit(LowLevelAlloc::Arena *arena) {
-  if (arena->pagesize == 0) {
-    arena->pagesize = getpagesize();
-    // Round up block sizes to a power of two close to the header size.
-    arena->roundup = 16;
-    while (arena->roundup < sizeof (arena->freelist.header)) {
-      arena->roundup += arena->roundup;
-    }
-    // Don't allocate blocks less than twice the roundup size to avoid tiny
-    // free blocks.
-    arena->min_size = 2 * arena->roundup;
-    arena->freelist.header.size = 0;
-    arena->freelist.header.magic =
-        Magic(kMagicUnallocated, &arena->freelist.header);
-    arena->freelist.header.arena = arena;
-    arena->freelist.levels = 0;
-    memset(arena->freelist.next, 0, sizeof (arena->freelist.next));
-    arena->allocation_count = 0;
-    if (arena == &default_arena) {
-      // Default arena should be hooked, e.g. for heap-checker to trace
-      // pointer chains through objects in the default arena.
-      arena->flags = LowLevelAlloc::kCallMallocHook;
-    } else if (arena == &unhooked_async_sig_safe_arena) {
-      arena->flags = LowLevelAlloc::kAsyncSignalSafe;
-    } else {
-      arena->flags = 0;   // other arenas' flags may be overridden by client,
-                          // but unhooked_arena will have 0 in 'flags'.
-    }
-    arena->allocator = LowLevelAlloc::GetDefaultPagesAllocator();
-  }
-}
-
-// L < meta_data_arena->mu
-LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
-                                              Arena *meta_data_arena) {
-  return NewArenaWithCustomAlloc(flags, meta_data_arena, NULL);
-}
-
-// L < meta_data_arena->mu
-LowLevelAlloc::Arena *LowLevelAlloc::NewArenaWithCustomAlloc(int32 flags,
-                                                             Arena *meta_data_arena,
-                                                             PagesAllocator *allocator) {
-  RAW_CHECK(meta_data_arena != 0, "must pass a valid arena");
-  if (meta_data_arena == &default_arena) {
-    if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-      meta_data_arena = &unhooked_async_sig_safe_arena;
-    } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) {
-      meta_data_arena = &unhooked_arena;
-    }
-  }
-  // Arena(0) uses the constructor for non-static contexts
-  Arena *result =
-    new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0);
-  ArenaInit(result);
-  result->flags = flags;
-  if (allocator) {
-    result->allocator = allocator;
-  }
-  return result;
-}
-
-// L < arena->mu, L < arena->arena->mu
-bool LowLevelAlloc::DeleteArena(Arena *arena) {
-  RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena,
-            "may not delete default arena");
-  ArenaLock section(arena);
-  bool empty = (arena->allocation_count == 0);
-  section.Leave();
-  if (empty) {
-    while (arena->freelist.next[0] != 0) {
-      AllocList *region = arena->freelist.next[0];
-      size_t size = region->header.size;
-      arena->freelist.next[0] = region->next[0];
-      RAW_CHECK(region->header.magic ==
-                Magic(kMagicUnallocated, &region->header),
-                "bad magic number in DeleteArena()");
-      RAW_CHECK(region->header.arena == arena,
-                "bad arena pointer in DeleteArena()");
-      RAW_CHECK(size % arena->pagesize == 0,
-                "empty arena has non-page-aligned block size");
-      RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0,
-                "empty arena has non-page-aligned block");
-      int munmap_result;
-      if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
-        munmap_result = munmap(region, size);
-      } else {
-        munmap_result = MallocHook::UnhookedMUnmap(region, size);
-      }
-      RAW_CHECK(munmap_result == 0,
-                "LowLevelAlloc::DeleteArena:  munmap failed address");
-    }
-    Free(arena);
-  }
-  return empty;
-}
-
-// ---------------------------------------------------------------------------
-
-// Return value rounded up to next multiple of align.
-// align must be a power of two.
-static intptr_t RoundUp(intptr_t addr, intptr_t align) {
-  return (addr + align - 1) & ~(align - 1);
-}
-
-// Equivalent to "return prev->next[i]" but with sanity checking
-// that the freelist is in the correct order, that it
-// consists of regions marked "unallocated", and that no two regions
-// are adjacent in memory (they should have been coalesced).
-// L < arena->mu
-static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) {
-  RAW_CHECK(i < prev->levels, "too few levels in Next()");
-  AllocList *next = prev->next[i];
-  if (next != 0) {
-    RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header),
-              "bad magic number in Next()");
-    RAW_CHECK(next->header.arena == arena,
-              "bad arena pointer in Next()");
-    if (prev != &arena->freelist) {
-      RAW_CHECK(prev < next, "unordered freelist");
-      RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size <
-                reinterpret_cast<char *>(next), "malformed freelist");
-    }
-  }
-  return next;
-}
-
-// Coalesce list item "a" with its successor if they are adjacent.
-static void Coalesce(AllocList *a) {
-  AllocList *n = a->next[0];
-  if (n != 0 && reinterpret_cast<char *>(a) + a->header.size ==
-                    reinterpret_cast<char *>(n)) {
-    LowLevelAlloc::Arena *arena = a->header.arena;
-    a->header.size += n->header.size;
-    n->header.magic = 0;
-    n->header.arena = 0;
-    AllocList *prev[kMaxLevel];
-    LLA_SkiplistDelete(&arena->freelist, n, prev);
-    LLA_SkiplistDelete(&arena->freelist, a, prev);
-    a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true);
-    LLA_SkiplistInsert(&arena->freelist, a, prev);
-  }
-}
-
-// Adds block at location "v" to the free list
-// L >= arena->mu
-static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) {
-  AllocList *f = reinterpret_cast<AllocList *>(
-                        reinterpret_cast<char *>(v) - sizeof (f->header));
-  RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
-            "bad magic number in AddToFreelist()");
-  RAW_CHECK(f->header.arena == arena,
-            "bad arena pointer in AddToFreelist()");
-  f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true);
-  AllocList *prev[kMaxLevel];
-  LLA_SkiplistInsert(&arena->freelist, f, prev);
-  f->header.magic = Magic(kMagicUnallocated, &f->header);
-  Coalesce(f);                  // maybe coalesce with successor
-  Coalesce(prev[0]);            // maybe coalesce with predecessor
-}
-
-// Frees storage allocated by LowLevelAlloc::Alloc().
-// L < arena->mu
-void LowLevelAlloc::Free(void *v) {
-  if (v != 0) {
-    AllocList *f = reinterpret_cast<AllocList *>(
-                        reinterpret_cast<char *>(v) - sizeof (f->header));
-    RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header),
-              "bad magic number in Free()");
-    LowLevelAlloc::Arena *arena = f->header.arena;
-    if ((arena->flags & kCallMallocHook) != 0) {
-      MallocHook::InvokeDeleteHook(v);
-    }
-    ArenaLock section(arena);
-    AddToFreelist(v, arena);
-    RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free");
-    arena->allocation_count--;
-    section.Leave();
-  }
-}
-
-// allocates and returns a block of size bytes, to be freed with Free()
-// L < arena->mu
-static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
-  void *result = 0;
-  if (request != 0) {
-    AllocList *s;       // will point to region that satisfies request
-    ArenaLock section(arena);
-    ArenaInit(arena);
-    // round up with header
-    size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup);
-    for (;;) {      // loop until we find a suitable region
-      // find the minimum levels that a block of this size must have
-      int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1;
-      if (i < arena->freelist.levels) {   // potential blocks exist
-        AllocList *before = &arena->freelist;  // predecessor of s
-        while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) {
-          before = s;
-        }
-        if (s != 0) {       // we found a region
-          break;
-        }
-      }
-      // we unlock before mmap() both because mmap() may call a callback hook,
-      // and because it may be slow.
-      arena->mu.Unlock();
-      // mmap generous 64K chunks to decrease
-      // the chances/impact of fragmentation:
-      size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16);
-      void *new_pages = arena->allocator->MapPages(arena->flags, new_pages_size);
-      arena->mu.Lock();
-      s = reinterpret_cast<AllocList *>(new_pages);
-      s->header.size = new_pages_size;
-      // Pretend the block is allocated; call AddToFreelist() to free it.
-      s->header.magic = Magic(kMagicAllocated, &s->header);
-      s->header.arena = arena;
-      AddToFreelist(&s->levels, arena);  // insert new region into free list
-    }
-    AllocList *prev[kMaxLevel];
-    LLA_SkiplistDelete(&arena->freelist, s, prev);    // remove from free list
-    // s points to the first free region that's big enough
-    if (req_rnd + arena->min_size <= s->header.size) {  // big enough to split
-      AllocList *n = reinterpret_cast<AllocList *>
-                        (req_rnd + reinterpret_cast<char *>(s));
-      n->header.size = s->header.size - req_rnd;
-      n->header.magic = Magic(kMagicAllocated, &n->header);
-      n->header.arena = arena;
-      s->header.size = req_rnd;
-      AddToFreelist(&n->levels, arena);
-    }
-    s->header.magic = Magic(kMagicAllocated, &s->header);
-    RAW_CHECK(s->header.arena == arena, "");
-    arena->allocation_count++;
-    section.Leave();
-    result = &s->levels;
-  }
-  ANNOTATE_NEW_MEMORY(result, request);
-  return result;
-}
-
-void *LowLevelAlloc::Alloc(size_t request) {
-  void *result = DoAllocWithArena(request, &default_arena);
-  if ((default_arena.flags & kCallMallocHook) != 0) {
-    // this call must be directly in the user-called allocator function
-    // for MallocHook::GetCallerStackTrace to work properly
-    MallocHook::InvokeNewHook(result, request);
-  }
-  return result;
-}
-
-void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) {
-  RAW_CHECK(arena != 0, "must pass a valid arena");
-  void *result = DoAllocWithArena(request, arena);
-  if ((arena->flags & kCallMallocHook) != 0) {
-    // this call must be directly in the user-called allocator function
-    // for MallocHook::GetCallerStackTrace to work properly
-    MallocHook::InvokeNewHook(result, request);
-  }
-  return result;
-}
-
-LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() {
-  return &default_arena;
-}
-
-static DefaultPagesAllocator *default_pages_allocator;
-static union {
-  char chars[sizeof(DefaultPagesAllocator)];
-  void *ptr;
-} debug_pages_allocator_space;
-
-LowLevelAlloc::PagesAllocator *LowLevelAlloc::GetDefaultPagesAllocator(void) {
-  if (default_pages_allocator) {
-    return default_pages_allocator;
-  }
-  default_pages_allocator = new (debug_pages_allocator_space.chars) DefaultPagesAllocator();
-  return default_pages_allocator;
-}
-
-void *DefaultPagesAllocator::MapPages(int32 flags, size_t size) {
-  void *new_pages;
-  if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
-    new_pages = MallocHook::UnhookedMMap(0, size,
-                                         PROT_WRITE|PROT_READ,
-                                         MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-  } else {
-    new_pages = mmap(0, size,
-                     PROT_WRITE|PROT_READ,
-                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-  }
-  RAW_CHECK(new_pages != MAP_FAILED, "mmap error");
-
-  return new_pages;
-}
-
-void DefaultPagesAllocator::UnMapPages(int32 flags, void *region, size_t size) {
-  int munmap_result;
-  if ((flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
-    munmap_result = munmap(region, size);
-  } else {
-    munmap_result = MallocHook::UnhookedMUnmap(region, size);
-  }
-  RAW_CHECK(munmap_result == 0,
-            "LowLevelAlloc::DeleteArena: munmap failed address");
-}
diff --git a/contrib/libtcmalloc/src/base/low_level_alloc.h b/contrib/libtcmalloc/src/base/low_level_alloc.h
deleted file mode 100644
index 8a20dd8b870..00000000000
--- a/contrib/libtcmalloc/src/base/low_level_alloc.h
+++ /dev/null
@@ -1,120 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if !defined(_BASE_LOW_LEVEL_ALLOC_H_)
-#define _BASE_LOW_LEVEL_ALLOC_H_
-
-// A simple thread-safe memory allocator that does not depend on
-// mutexes or thread-specific data.  It is intended to be used
-// sparingly, and only when malloc() would introduce an unwanted
-// dependency, such as inside the heap-checker.
-
-#include "../config.h"
-#include <stddef.h>             // for size_t
-#include "base/basictypes.h"
-
-class LowLevelAlloc {
- public:
-  class PagesAllocator {
-  public:
-    virtual ~PagesAllocator();
-    virtual void *MapPages(int32 flags, size_t size) = 0;
-    virtual void UnMapPages(int32 flags, void *addr, size_t size) = 0;
-  };
-
-  static PagesAllocator *GetDefaultPagesAllocator(void);
-
-  struct Arena;       // an arena from which memory may be allocated
-
-  // Returns a pointer to a block of at least "request" bytes
-  // that have been newly allocated from the specific arena.
-  // for Alloc() call the DefaultArena() is used.
-  // Returns 0 if passed request==0.
-  // Does not return 0 under other circumstances; it crashes if memory
-  // is not available.
-  static void *Alloc(size_t request)
-    ATTRIBUTE_SECTION(malloc_hook);
-  static void *AllocWithArena(size_t request, Arena *arena)
-    ATTRIBUTE_SECTION(malloc_hook);
-
-  // Deallocates a region of memory that was previously allocated with
-  // Alloc().   Does nothing if passed 0.   "s" must be either 0,
-  // or must have been returned from a call to Alloc() and not yet passed to
-  // Free() since that call to Alloc().  The space is returned to the arena
-  // from which it was allocated.
-  static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook);
-
-    // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free
-    // are to put all callers of MallocHook::Invoke* in this module
-    // into special section,
-    // so that MallocHook::GetCallerStackTrace can function accurately.
-
-  // Create a new arena.
-  // The root metadata for the new arena is allocated in the
-  // meta_data_arena; the DefaultArena() can be passed for meta_data_arena.
-  // These values may be ored into flags:
-  enum {
-    // Report calls to Alloc() and Free() via the MallocHook interface.
-    // Set in the DefaultArena.
-    kCallMallocHook = 0x0001,
-
-    // Make calls to Alloc(), Free() be async-signal-safe.  Not set in
-    // DefaultArena().
-    kAsyncSignalSafe = 0x0002,
-
-    // When used with DefaultArena(), the NewArena() and DeleteArena() calls
-    // obey the flags given explicitly in the NewArena() call, even if those
-    // flags differ from the settings in DefaultArena().  So the call
-    // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe,
-    // as well as generatating an arena that provides async-signal-safe
-    // Alloc/Free.
-  };
-  static Arena *NewArena(int32 flags, Arena *meta_data_arena);
-
-  // note: pages allocator will never be destroyed and allocated pages will never be freed
-  // When allocator is NULL, it's same as NewArena
-  static Arena *NewArenaWithCustomAlloc(int32 flags, Arena *meta_data_arena, PagesAllocator *allocator);
-
-  // Destroys an arena allocated by NewArena and returns true,
-  // provided no allocated blocks remain in the arena.
-  // If allocated blocks remain in the arena, does nothing and
-  // returns false.
-  // It is illegal to attempt to destroy the DefaultArena().
-  static bool DeleteArena(Arena *arena);
-
-  // The default arena that always exists.
-  static Arena *DefaultArena();
-
- private:
-  LowLevelAlloc();      // no instances
-};
-
-#endif
diff --git a/contrib/libtcmalloc/src/base/simple_mutex.h b/contrib/libtcmalloc/src/base/simple_mutex.h
deleted file mode 100644
index e57c1079283..00000000000
--- a/contrib/libtcmalloc/src/base/simple_mutex.h
+++ /dev/null
@@ -1,332 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
-// ---
-// Author: Craig Silverstein.
-//
-// A simple mutex wrapper, supporting locks and read-write locks.
-// You should assume the locks are *not* re-entrant.
-//
-// To use: you should define the following macros in your configure.ac:
-//   ACX_PTHREAD
-//   AC_RWLOCK
-// The latter is defined in ../autoconf.
-//
-// This class is meant to be internal-only and should be wrapped by an
-// internal namespace.  Before you use this module, please give the
-// name of your internal namespace for this module.  Or, if you want
-// to expose it, you'll want to move it to the Google namespace.  We
-// cannot put this class in global namespace because there can be some
-// problems when we have multiple versions of Mutex in each shared object.
-//
-// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG
-//       mode.
-//
-// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy:
-//    http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html
-// Because of that, we might as well use windows locks for
-// cygwin.  They seem to be more reliable than the cygwin pthreads layer.
-//
-// TRICKY IMPLEMENTATION NOTE:
-// This class is designed to be safe to use during
-// dynamic-initialization -- that is, by global constructors that are
-// run before main() starts.  The issue in this case is that
-// dynamic-initialization happens in an unpredictable order, and it
-// could be that someone else's dynamic initializer could call a
-// function that tries to acquire this mutex -- but that all happens
-// before this mutex's constructor has run.  (This can happen even if
-// the mutex and the function that uses the mutex are in the same .cc
-// file.)  Basically, because Mutex does non-trivial work in its
-// constructor, it's not, in the naive implementation, safe to use
-// before dynamic initialization has run on it.
-//
-// The solution used here is to pair the actual mutex primitive with a
-// bool that is set to true when the mutex is dynamically initialized.
-// (Before that it's false.)  Then we modify all mutex routines to
-// look at the bool, and not try to lock/unlock until the bool makes
-// it to true (which happens after the Mutex constructor has run.)
-//
-// This works because before main() starts -- particularly, during
-// dynamic initialization -- there are no threads, so a) it's ok that
-// the mutex operations are a no-op, since we don't need locking then
-// anyway; and b) we can be quite confident our bool won't change
-// state between a call to Lock() and a call to Unlock() (that would
-// require a global constructor in one translation unit to call Lock()
-// and another global constructor in another translation unit to call
-// Unlock() later, which is pretty perverse).
-//
-// That said, it's tricky, and can conceivably fail; it's safest to
-// avoid trying to acquire a mutex in a global constructor, if you
-// can.  One way it can fail is that a really smart compiler might
-// initialize the bool to true at static-initialization time (too
-// early) rather than at dynamic-initialization time.  To discourage
-// that, we set is_safe_ to true in code (not the constructor
-// colon-initializer) and set it to true via a function that always
-// evaluates to true, but that the compiler can't know always
-// evaluates to true.  This should be good enough.
-//
-// A related issue is code that could try to access the mutex
-// after it's been destroyed in the global destructors (because
-// the Mutex global destructor runs before some other global
-// destructor, that tries to acquire the mutex).  The way we
-// deal with this is by taking a constructor arg that global
-// mutexes should pass in, that causes the destructor to do no
-// work.  We still depend on the compiler not doing anything
-// weird to a Mutex's memory after it is destroyed, but for a
-// static global variable, that's pretty safe.
-
-#ifndef GOOGLE_MUTEX_H_
-#define GOOGLE_MUTEX_H_
-
-#include "../config.h"
-
-#if defined(NO_THREADS)
-  typedef int MutexType;      // to keep a lock-count
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-# ifndef WIN32_LEAN_AND_MEAN
-#   define WIN32_LEAN_AND_MEAN  // We only need minimal includes
-# endif
-  // We need Windows NT or later for TryEnterCriticalSection().  If you
-  // don't need that functionality, you can remove these _WIN32_WINNT
-  // lines, and change TryLock() to assert(0) or something.
-# ifndef _WIN32_WINNT
-#   define _WIN32_WINNT 0x0400
-# endif
-# include <windows.h>
-  typedef CRITICAL_SECTION MutexType;
-#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
-  // Needed for pthread_rwlock_*.  If it causes problems, you could take it
-  // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
-  // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
-  // for locking there.)
-# ifdef __linux__
-#   define _XOPEN_SOURCE 500  // may be needed to get the rwlock calls
-# endif
-# include <pthread.h>
-  typedef pthread_rwlock_t MutexType;
-#elif defined(HAVE_PTHREAD)
-# include <pthread.h>
-  typedef pthread_mutex_t MutexType;
-#else
-# error Need to implement mutex.h for your architecture, or #define NO_THREADS
-#endif
-
-#include <assert.h>
-#include <stdlib.h>      // for abort()
-
-#define MUTEX_NAMESPACE perftools_mutex_namespace
-
-namespace MUTEX_NAMESPACE {
-
-class Mutex {
- public:
-  // This is used for the single-arg constructor
-  enum LinkerInitialized { LINKER_INITIALIZED };
-
-  // Create a Mutex that is not held by anybody.  This constructor is
-  // typically used for Mutexes allocated on the heap or the stack.
-  inline Mutex();
-  // This constructor should be used for global, static Mutex objects.
-  // It inhibits work being done by the destructor, which makes it
-  // safer for code that tries to acqiure this mutex in their global
-  // destructor.
-  inline Mutex(LinkerInitialized);
-
-  // Destructor
-  inline ~Mutex();
-
-  inline void Lock();    // Block if needed until free then acquire exclusively
-  inline void Unlock();  // Release a lock acquired via Lock()
-  inline bool TryLock(); // If free, Lock() and return true, else return false
-  // Note that on systems that don't support read-write locks, these may
-  // be implemented as synonyms to Lock() and Unlock().  So you can use
-  // these for efficiency, but don't use them anyplace where being able
-  // to do shared reads is necessary to avoid deadlock.
-  inline void ReaderLock();   // Block until free or shared then acquire a share
-  inline void ReaderUnlock(); // Release a read share of this Mutex
-  inline void WriterLock() { Lock(); }     // Acquire an exclusive lock
-  inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
-
- private:
-  MutexType mutex_;
-  // We want to make sure that the compiler sets is_safe_ to true only
-  // when we tell it to, and never makes assumptions is_safe_ is
-  // always true.  volatile is the most reliable way to do that.
-  volatile bool is_safe_;
-  // This indicates which constructor was called.
-  bool destroy_;
-
-  inline void SetIsSafe() { is_safe_ = true; }
-
-  // Catch the error of writing Mutex when intending MutexLock.
-  Mutex(Mutex* /*ignored*/) {}
-  // Disallow "evil" constructors
-  Mutex(const Mutex&);
-  void operator=(const Mutex&);
-};
-
-// Now the implementation of Mutex for various systems
-#if defined(NO_THREADS)
-
-// When we don't have threads, we can be either reading or writing,
-// but not both.  We can have lots of readers at once (in no-threads
-// mode, that's most likely to happen in recursive function calls),
-// but only one writer.  We represent this by having mutex_ be -1 when
-// writing and a number > 0 when reading (and 0 when no lock is held).
-//
-// In debug mode, we assert these invariants, while in non-debug mode
-// we do nothing, for efficiency.  That's why everything is in an
-// assert.
-
-Mutex::Mutex() : mutex_(0) { }
-Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { }
-Mutex::~Mutex()            { assert(mutex_ == 0); }
-void Mutex::Lock()         { assert(--mutex_ == -1); }
-void Mutex::Unlock()       { assert(mutex_++ == -1); }
-bool Mutex::TryLock()      { if (mutex_) return false; Lock(); return true; }
-void Mutex::ReaderLock()   { assert(++mutex_ > 0); }
-void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
-
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-
-Mutex::Mutex() : destroy_(true) {
-  InitializeCriticalSection(&mutex_);
-  SetIsSafe();
-}
-Mutex::Mutex(LinkerInitialized) : destroy_(false) {
-  InitializeCriticalSection(&mutex_);
-  SetIsSafe();
-}
-Mutex::~Mutex()            { if (destroy_) DeleteCriticalSection(&mutex_); }
-void Mutex::Lock()         { if (is_safe_) EnterCriticalSection(&mutex_); }
-void Mutex::Unlock()       { if (is_safe_) LeaveCriticalSection(&mutex_); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                                 TryEnterCriticalSection(&mutex_) != 0 : true; }
-void Mutex::ReaderLock()   { Lock(); }      // we don't have read-write locks
-void Mutex::ReaderUnlock() { Unlock(); }
-
-#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
-
-#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
-  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
-} while (0)
-
-Mutex::Mutex() : destroy_(true) {
-  SetIsSafe();
-  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
-  SetIsSafe();
-  if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); }
-void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock); }
-void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                               pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
-void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock); }
-void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
-#undef SAFE_PTHREAD
-
-#elif defined(HAVE_PTHREAD)
-
-#define SAFE_PTHREAD(fncall)  do {   /* run fncall if is_safe_ is true */  \
-  if (is_safe_ && fncall(&mutex_) != 0) abort();                           \
-} while (0)
-
-Mutex::Mutex() : destroy_(true) {
-  SetIsSafe();
-  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) {
-  SetIsSafe();
-  if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
-}
-Mutex::~Mutex()       { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); }
-void Mutex::Lock()         { SAFE_PTHREAD(pthread_mutex_lock); }
-void Mutex::Unlock()       { SAFE_PTHREAD(pthread_mutex_unlock); }
-bool Mutex::TryLock()      { return is_safe_ ?
-                                 pthread_mutex_trylock(&mutex_) == 0 : true; }
-void Mutex::ReaderLock()   { Lock(); }
-void Mutex::ReaderUnlock() { Unlock(); }
-#undef SAFE_PTHREAD
-
-#endif
-
-// --------------------------------------------------------------------------
-// Some helper classes
-
-// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
-class MutexLock {
- public:
-  explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
-  ~MutexLock() { mu_->Unlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  MutexLock(const MutexLock&);
-  void operator=(const MutexLock&);
-};
-
-// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
-class ReaderMutexLock {
- public:
-  explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
-  ~ReaderMutexLock() { mu_->ReaderUnlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  ReaderMutexLock(const ReaderMutexLock&);
-  void operator=(const ReaderMutexLock&);
-};
-
-class WriterMutexLock {
- public:
-  explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
-  ~WriterMutexLock() { mu_->WriterUnlock(); }
- private:
-  Mutex * const mu_;
-  // Disallow "evil" constructors
-  WriterMutexLock(const WriterMutexLock&);
-  void operator=(const WriterMutexLock&);
-};
-
-// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
-#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
-#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
-#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
-
-}  // namespace MUTEX_NAMESPACE
-
-using namespace MUTEX_NAMESPACE;
-
-#undef MUTEX_NAMESPACE
-
-#endif  /* #define GOOGLE_SIMPLE_MUTEX_H_ */
diff --git a/contrib/libtcmalloc/src/base/spinlock.cc b/contrib/libtcmalloc/src/base/spinlock.cc
deleted file mode 100644
index 48bb163d1de..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-#include "../config.h"
-#include "base/spinlock.h"
-#include "base/spinlock_internal.h"
-#include "base/sysinfo.h"   /* for GetSystemCPUsCount() */
-
-// NOTE on the Lock-state values:
-//
-// kSpinLockFree represents the unlocked state
-// kSpinLockHeld represents the locked state with no waiters
-// kSpinLockSleeper represents the locked state with waiters
-
-static int adaptive_spin_count = 0;
-
-const base::LinkerInitialized SpinLock::LINKER_INITIALIZED =
-    base::LINKER_INITIALIZED;
-
-namespace {
-struct SpinLock_InitHelper {
-  SpinLock_InitHelper() {
-    // On multi-cpu machines, spin for longer before yielding
-    // the processor or sleeping.  Reduces idle time significantly.
-    if (GetSystemCPUsCount() > 1) {
-      adaptive_spin_count = 1000;
-    }
-  }
-};
-
-// Hook into global constructor execution:
-// We do not do adaptive spinning before that,
-// but nothing lock-intensive should be going on at that time.
-static SpinLock_InitHelper init_helper;
-
-inline void SpinlockPause(void) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  __asm__ __volatile__("rep; nop" : : );
-#endif
-}
-
-}  // unnamed namespace
-
-// Monitor the lock to see if its value changes within some time
-// period (adaptive_spin_count loop iterations). The last value read
-// from the lock is returned from the method.
-Atomic32 SpinLock::SpinLoop() {
-  int c = adaptive_spin_count;
-  while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) {
-    SpinlockPause();
-  }
-  return base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                              kSpinLockSleeper);
-}
-
-void SpinLock::SlowLock() {
-  Atomic32 lock_value = SpinLoop();
-
-  int lock_wait_call_count = 0;
-  while (lock_value != kSpinLockFree) {
-    // If the lock is currently held, but not marked as having a sleeper, mark
-    // it as having a sleeper.
-    if (lock_value == kSpinLockHeld) {
-      // Here, just "mark" that the thread is going to sleep.  Don't store the
-      // lock wait time in the lock as that will cause the current lock
-      // owner to think it experienced contention.
-      lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
-                                                        kSpinLockHeld,
-                                                        kSpinLockSleeper);
-      if (lock_value == kSpinLockHeld) {
-        // Successfully transitioned to kSpinLockSleeper.  Pass
-        // kSpinLockSleeper to the SpinLockDelay routine to properly indicate
-        // the last lock_value observed.
-        lock_value = kSpinLockSleeper;
-      } else if (lock_value == kSpinLockFree) {
-        // Lock is free again, so try and acquire it before sleeping.  The
-        // new lock state will be the number of cycles this thread waited if
-        // this thread obtains the lock.
-        lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_,
-                                                          kSpinLockFree,
-                                                          kSpinLockSleeper);
-        continue;  // skip the delay at the end of the loop
-      }
-    }
-
-    // Wait for an OS specific delay.
-    base::internal::SpinLockDelay(&lockword_, lock_value,
-                                  ++lock_wait_call_count);
-    // Spin again after returning from the wait routine to give this thread
-    // some chance of obtaining the lock.
-    lock_value = SpinLoop();
-  }
-}
-
-void SpinLock::SlowUnlock() {
-  // wake waiter if necessary
-  base::internal::SpinLockWake(&lockword_, false);
-}
diff --git a/contrib/libtcmalloc/src/base/spinlock.h b/contrib/libtcmalloc/src/base/spinlock.h
deleted file mode 100644
index 42a4eb906a0..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock.h
+++ /dev/null
@@ -1,143 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- */
-
-// SpinLock is async signal safe.
-// If used within a signal handler, all lock holders
-// should block the signal even outside the signal handler.
-
-#ifndef BASE_SPINLOCK_H_
-#define BASE_SPINLOCK_H_
-
-#include "../config.h"
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include "base/dynamic_annotations.h"
-#include "base/thread_annotations.h"
-
-class LOCKABLE SpinLock {
- public:
-  SpinLock() : lockword_(kSpinLockFree) { }
-
-  // Special constructor for use with static SpinLock objects.  E.g.,
-  //
-  //    static SpinLock lock(base::LINKER_INITIALIZED);
-  //
-  // When intialized using this constructor, we depend on the fact
-  // that the linker has already initialized the memory appropriately.
-  // A SpinLock constructed like this can be freely used from global
-  // initializers without worrying about the order in which global
-  // initializers run.
-  explicit SpinLock(base::LinkerInitialized /*x*/) {
-    // Does nothing; lockword_ is already initialized
-  }
-
-  // Acquire this SpinLock.
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ {
-    if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                             kSpinLockHeld) != kSpinLockFree) {
-      SlowLock();
-    }
-    ANNOTATE_RWLOCK_ACQUIRED(this, 1);
-  }
-
-  // Try to acquire this SpinLock without blocking and return true if the
-  // acquisition was successful.  If the lock was not acquired, false is
-  // returned.  If this SpinLock is free at the time of the call, TryLock
-  // will return true with high probability.
-  inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) {
-    bool res =
-        (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree,
-                                              kSpinLockHeld) == kSpinLockFree);
-    if (res) {
-      ANNOTATE_RWLOCK_ACQUIRED(this, 1);
-    }
-    return res;
-  }
-
-  // Release this SpinLock, which must be held by the calling thread.
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline void Unlock() /*UNLOCK_FUNCTION()*/ {
-    ANNOTATE_RWLOCK_RELEASED(this, 1);
-    uint64 prev_value = static_cast<uint64>(
-        base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
-    if (prev_value != kSpinLockHeld) {
-      // Speed the wakeup of any waiter.
-      SlowUnlock();
-    }
-  }
-
-  // Determine if the lock is held.  When the lock is held by the invoking
-  // thread, true will always be returned. Intended to be used as
-  // CHECK(lock.IsHeld()).
-  inline bool IsHeld() const {
-    return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree;
-  }
-
-  static const base::LinkerInitialized LINKER_INITIALIZED;  // backwards compat
- private:
-  enum { kSpinLockFree = 0 };
-  enum { kSpinLockHeld = 1 };
-  enum { kSpinLockSleeper = 2 };
-
-  volatile Atomic32 lockword_;
-
-  void SlowLock();
-  void SlowUnlock();
-  Atomic32 SpinLoop();
-
-  DISALLOW_COPY_AND_ASSIGN(SpinLock);
-};
-
-// Corresponding locker object that arranges to acquire a spinlock for
-// the duration of a C++ scope.
-class SCOPED_LOCKABLE SpinLockHolder {
- private:
-  SpinLock* lock_;
- public:
-  inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l)
-      : lock_(l) {
-    l->Lock();
-  }
-  // TODO(csilvers): uncomment the annotation when we figure out how to
-  //                 support this macro with 0 args (see thread_annotations.h)
-  inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); }
-};
-// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock);
-#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name)
-
-
-#endif  // BASE_SPINLOCK_H_
diff --git a/contrib/libtcmalloc/src/base/spinlock_internal.cc b/contrib/libtcmalloc/src/base/spinlock_internal.cc
deleted file mode 100644
index d9629717be1..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_internal.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// The OS-specific header included below must provide two calls:
-// base::internal::SpinLockDelay() and base::internal::SpinLockWake().
-// See spinlock_internal.h for the spec of SpinLockWake().
-
-// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop)
-// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a
-// spin loop on location *w, whose previously observed value was "value".
-// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick,
-// or may wait for a delay that can be truncated by a call to SpinlockWake(w).
-// In all cases, it must return in bounded time even if SpinlockWake() is not
-// called.
-
-#include "base/spinlock_internal.h"
-
-// forward declaration for use by spinlock_*-inl.h
-namespace base { namespace internal { static int SuggestedDelayNS(int loop); }}
-
-#if defined(_WIN32)
-#include "base/spinlock_win32-inl.h"
-#elif defined(__linux__)
-#include "base/spinlock_linux-inl.h"
-#else
-#include "base/spinlock_posix-inl.h"
-#endif
-
-namespace base {
-namespace internal {
-
-// Return a suggested delay in nanoseconds for iteration number "loop"
-static int SuggestedDelayNS(int loop) {
-  // Weak pseudo-random number generator to get some spread between threads
-  // when many are spinning.
-#ifdef BASE_HAS_ATOMIC64
-  static base::subtle::Atomic64 rand;
-  uint64 r = base::subtle::NoBarrier_Load(&rand);
-  r = 0x5deece66dLL * r + 0xb;   // numbers from nrand48()
-  base::subtle::NoBarrier_Store(&rand, r);
-
-  r <<= 16;   // 48-bit random number now in top 48-bits.
-  if (loop < 0 || loop > 32) {   // limit loop to 0..32
-    loop = 32;
-  }
-  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
-  // Select top 20..24 bits of lower 48 bits,
-  // giving approximately 0ms to 16ms.
-  // Mean is exponential in loop for first 32 iterations, then 8ms.
-  // The futex path multiplies this by 16, since we expect explicit wakeups
-  // almost always on that path.
-  return r >> (44 - (loop >> 3));
-#else
-  static Atomic32 rand;
-  uint32 r = base::subtle::NoBarrier_Load(&rand);
-  r = 0x343fd * r + 0x269ec3;   // numbers from MSVC++
-  base::subtle::NoBarrier_Store(&rand, r);
-
-  r <<= 1;   // 31-bit random number now in top 31-bits.
-  if (loop < 0 || loop > 32) {   // limit loop to 0..32
-    loop = 32;
-  }
-  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
-  // Select top 20..24 bits of lower 31 bits,
-  // giving approximately 0ms to 16ms.
-  // Mean is exponential in loop for first 32 iterations, then 8ms.
-  // The futex path multiplies this by 16, since we expect explicit wakeups
-  // almost always on that path.
-  return r >> (12 - (loop >> 3));
-#endif
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_internal.h b/contrib/libtcmalloc/src/base/spinlock_internal.h
deleted file mode 100644
index 636885cd6e5..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_internal.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is an internal part spinlock.cc and once.cc
- * It may not be used directly by code outside of //base.
- */
-
-#ifndef BASE_SPINLOCK_INTERNAL_H_
-#define BASE_SPINLOCK_INTERNAL_H_
-
-#include "../config.h"
-#include "base/basictypes.h"
-#include "base/atomicops.h"
-
-namespace base {
-namespace internal {
-
-void SpinLockWake(volatile Atomic32 *w, bool all);
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop);
-
-} // namespace internal
-} // namespace base
-#endif
diff --git a/contrib/libtcmalloc/src/base/spinlock_linux-inl.h b/contrib/libtcmalloc/src/base/spinlock_linux-inl.h
deleted file mode 100644
index aadf62a4b67..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_linux-inl.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Linux-specific part of spinlock_internal.cc
- */
-
-#include <errno.h>
-#include <sched.h>
-#include <time.h>
-#include <limits.h>
-#include "base/linux_syscall_support.h"
-
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-#define FUTEX_PRIVATE_FLAG 128
-
-static bool have_futex;
-static int futex_private_flag = FUTEX_PRIVATE_FLAG;
-
-namespace {
-static struct InitModule {
-  InitModule() {
-    int x = 0;
-    // futexes are ints, so we can use them only when
-    // that's the same size as the lockword_ in SpinLock.
-    have_futex = (sizeof (Atomic32) == sizeof (int) &&
-                  sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0);
-    if (have_futex &&
-        sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) {
-      futex_private_flag = 0;
-    }
-  }
-} init_module;
-
-}  // anonymous namespace
-
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  if (loop != 0) {
-    int save_errno = errno;
-    struct timespec tm;
-    tm.tv_sec = 0;
-    if (have_futex) {
-      tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
-    } else {
-      tm.tv_nsec = 2000001;   // above 2ms so linux 2.4 doesn't spin
-    }
-    if (have_futex) {
-      tm.tv_nsec *= 16;  // increase the delay; we expect explicit wakeups
-      sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
-                FUTEX_WAIT | futex_private_flag,
-                value, reinterpret_cast<struct kernel_timespec *>(&tm),
-                NULL, 0);
-    } else {
-      nanosleep(&tm, NULL);
-    }
-    errno = save_errno;
-  }
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-  if (have_futex) {
-    sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
-              FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1,
-              NULL, NULL, 0);
-  }
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_posix-inl.h b/contrib/libtcmalloc/src/base/spinlock_posix-inl.h
deleted file mode 100644
index 2695b7b1bb9..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_posix-inl.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Posix-specific part of spinlock_internal.cc
- */
-
-#include "../config.h"
-#include <errno.h>
-#ifdef HAVE_SCHED_H
-#include <sched.h>      /* For sched_yield() */
-#endif
-#include <time.h>       /* For nanosleep() */
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  int save_errno = errno;
-  if (loop == 0) {
-  } else if (loop == 1) {
-    sched_yield();
-  } else {
-    struct timespec tm;
-    tm.tv_sec = 0;
-    tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
-    nanosleep(&tm, NULL);
-  }
-  errno = save_errno;
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/spinlock_win32-inl.h b/contrib/libtcmalloc/src/base/spinlock_win32-inl.h
deleted file mode 100644
index 956b9653e6d..00000000000
--- a/contrib/libtcmalloc/src/base/spinlock_win32-inl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2009, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * This file is a Win32-specific part of spinlock_internal.cc
- */
-
-
-#include <windows.h>
-
-namespace base {
-namespace internal {
-
-void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
-  if (loop == 0) {
-  } else if (loop == 1) {
-    Sleep(0);
-  } else {
-    Sleep(base::internal::SuggestedDelayNS(loop) / 1000000);
-  }
-}
-
-void SpinLockWake(volatile Atomic32 *w, bool all) {
-}
-
-} // namespace internal
-} // namespace base
diff --git a/contrib/libtcmalloc/src/base/stl_allocator.h b/contrib/libtcmalloc/src/base/stl_allocator.h
deleted file mode 100644
index 4520713622f..00000000000
--- a/contrib/libtcmalloc/src/base/stl_allocator.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-
-#ifndef BASE_STL_ALLOCATOR_H_
-#define BASE_STL_ALLOCATOR_H_
-
-#include "../config.h"
-
-#include <stddef.h>   // for ptrdiff_t
-#include <limits>
-
-#include "base/logging.h"
-
-// Generic allocator class for STL objects
-// that uses a given type-less allocator Alloc, which must provide:
-//   static void* Alloc::Allocate(size_t size);
-//   static void Alloc::Free(void* ptr, size_t size);
-//
-// STL_Allocator<T, MyAlloc> provides the same thread-safety
-// guarantees as MyAlloc.
-//
-// Usage example:
-//   set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set;
-// CAVEAT: Parts of the code below are probably specific
-//         to the STL version(s) we are using.
-//         The code is simply lifted from what std::allocator<> provides.
-template <typename T, class Alloc>
-class STL_Allocator {
- public:
-  typedef size_t     size_type;
-  typedef ptrdiff_t  difference_type;
-  typedef T*         pointer;
-  typedef const T*   const_pointer;
-  typedef T&         reference;
-  typedef const T&   const_reference;
-  typedef T          value_type;
-
-  template <class T1> struct rebind {
-    typedef STL_Allocator<T1, Alloc> other;
-  };
-
-  STL_Allocator() { }
-  STL_Allocator(const STL_Allocator&) { }
-  template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { }
-  ~STL_Allocator() { }
-
-  pointer address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
-  pointer allocate(size_type n, const void* = 0) {
-    RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
-    return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
-  }
-  void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
-
-  size_type max_size() const { return size_t(-1) / sizeof(T); }
-
-  void construct(pointer p, const T& val) { ::new(p) T(val); }
-  void construct(pointer p) { ::new(p) T(); }
-  void destroy(pointer p) { p->~T(); }
-
-  // There's no state, so these allocators are always equal
-  bool operator==(const STL_Allocator&) const { return true; }
-};
-
-#endif  // BASE_STL_ALLOCATOR_H_
diff --git a/contrib/libtcmalloc/src/base/sysinfo.cc b/contrib/libtcmalloc/src/base/sysinfo.cc
deleted file mode 100644
index 75217e6795a..00000000000
--- a/contrib/libtcmalloc/src/base/sysinfo.cc
+++ /dev/null
@@ -1,860 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "../config.h"
-#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
-# define PLATFORM_WINDOWS 1
-#endif
-
-#include <ctype.h>    // for isspace()
-#include <stdlib.h>   // for getenv()
-#include <stdio.h>    // for snprintf(), sscanf()
-#include <string.h>   // for memmove(), memchr(), etc.
-#include <fcntl.h>    // for open()
-#include <errno.h>    // for errno
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for read()
-#endif
-#if defined __MACH__          // Mac OS X, almost certainly
-#include <mach-o/dyld.h>      // for iterating over dll's in ProcMapsIter
-#include <mach-o/loader.h>    // for iterating over dll's in ProcMapsIter
-#include <sys/types.h>
-#include <sys/sysctl.h>       // how we figure out numcpu's on OS X
-#elif defined __FreeBSD__
-#include <sys/sysctl.h>
-#elif defined __sun__         // Solaris
-#include <procfs.h>           // for, e.g., prmap_t
-#elif defined(PLATFORM_WINDOWS)
-#include <process.h>          // for getpid() (actually, _getpid())
-#include <shlwapi.h>          // for SHGetValueA()
-#include <tlhelp32.h>         // for Module32First()
-#endif
-#include "base/sysinfo.h"
-#include "base/commandlineflags.h"
-#include "base/dynamic_annotations.h"   // for RunningOnValgrind
-#include "base/logging.h"
-
-#ifdef PLATFORM_WINDOWS
-#ifdef MODULEENTRY32
-// In a change from the usual W-A pattern, there is no A variant of
-// MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
-// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be
-// MODULEENTRY32W.  These #undefs are the only way I see to get back
-// access to the original, ascii struct (and related functions).
-#undef MODULEENTRY32
-#undef Module32First
-#undef Module32Next
-#undef PMODULEENTRY32
-#undef LPMODULEENTRY32
-#endif  /* MODULEENTRY32 */
-// MinGW doesn't seem to define this, perhaps some windowsen don't either.
-#ifndef TH32CS_SNAPMODULE32
-#define TH32CS_SNAPMODULE32  0
-#endif  /* TH32CS_SNAPMODULE32 */
-#endif  /* PLATFORM_WINDOWS */
-
-// Re-run fn until it doesn't cause EINTR.
-#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
-
-// open/read/close can set errno, which may be illegal at this
-// time, so prefer making the syscalls directly if we can.
-#ifdef HAVE_SYS_SYSCALL_H
-# include <sys/syscall.h>
-#endif
-#ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
-# define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
-#else
-# define safeopen(filename, mode)  open(filename, mode)
-#endif
-#ifdef SYS_read
-# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
-#else
-# define saferead(fd, buffer, size)  read(fd, buffer, size)
-#endif
-#ifdef SYS_close
-# define safeclose(fd)  syscall(SYS_close, fd)
-#else
-# define safeclose(fd)  close(fd)
-#endif
-
-// ----------------------------------------------------------------------
-// GetenvBeforeMain()
-// GetUniquePathFromEnv()
-//    Some non-trivial getenv-related functions.
-// ----------------------------------------------------------------------
-
-// It's not safe to call getenv() in the malloc hooks, because they
-// might be called extremely early, before libc is done setting up
-// correctly.  In particular, the thread library may not be done
-// setting up errno.  So instead, we use the built-in __environ array
-// if it exists, and otherwise read /proc/self/environ directly, using
-// system calls to read the file, and thus avoid setting errno.
-// /proc/self/environ has a limit of how much data it exports (around
-// 8K), so it's not an ideal solution.
-const char* GetenvBeforeMain(const char* name) {
-#if defined(HAVE___ENVIRON)   // if we have it, it's declared in unistd.h
-  if (__environ) {            // can exist but be NULL, if statically linked
-    const int namelen = strlen(name);
-    for (char** p = __environ; *p; p++) {
-      if (strlen(*p) < namelen) {
-        continue;
-      }
-      if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=')  // it's a match
-        return *p + namelen+1;                                 // point after =
-    }
-    return NULL;
-  }
-#endif
-#if defined(PLATFORM_WINDOWS)
-  // TODO(mbelshe) - repeated calls to this function will overwrite the
-  // contents of the static buffer.
-  static char envvar_buf[1024];  // enough to hold any envvar we care about
-  if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
-    return NULL;
-  return envvar_buf;
-#endif
-  // static is ok because this function should only be called before
-  // main(), when we're single-threaded.
-  static char envbuf[16<<10];
-  if (*envbuf == '\0') {    // haven't read the environ yet
-    int fd = safeopen("/proc/self/environ", O_RDONLY);
-    // The -2 below guarantees the last two bytes of the buffer will be \0\0
-    if (fd == -1 ||           // unable to open the file, fall back onto libc
-        saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file
-      RAW_VLOG(1, "Unable to open /proc/self/environ, falling back "
-               "on getenv(\"%s\"), which may not work", name);
-      if (fd != -1) safeclose(fd);
-      return getenv(name);
-    }
-    safeclose(fd);
-  }
-  const int namelen = strlen(name);
-  const char* p = envbuf;
-  while (*p != '\0') {    // will happen at the \0\0 that terminates the buffer
-    // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
-    const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf));
-    if (endp == NULL)            // this entry isn't NUL terminated
-      return NULL;
-    else if (!memcmp(p, name, namelen) && p[namelen] == '=')    // it's a match
-      return p + namelen+1;      // point after =
-    p = endp + 1;
-  }
-  return NULL;                   // env var never found
-}
-
-extern "C" {
-  const char* TCMallocGetenvSafe(const char* name) {
-    return GetenvBeforeMain(name);
-  }
-}
-
-// This takes as an argument an environment-variable name (like
-// CPUPROFILE) whose value is supposed to be a file-path, and sets
-// path to that path, and returns true.  If the env var doesn't exist,
-// or is the empty string, leave path unchanged and returns false.
-// The reason this is non-trivial is that this function handles munged
-// pathnames.  Here's why:
-//
-// If we're a child process of the 'main' process, we can't just use
-// getenv("CPUPROFILE") -- the parent process will be using that path.
-// Instead we append our pid to the pathname.  How do we tell if we're a
-// child process?  Ideally we'd set an environment variable that all
-// our children would inherit.  But -- and this is seemingly a bug in
-// gcc -- if you do a setenv() in a shared libarary in a global
-// constructor, the environment setting is lost by the time main() is
-// called.  The only safe thing we can do in such a situation is to
-// modify the existing envvar.  So we do a hack: in the parent, we set
-// the high bit of the 1st char of CPUPROFILE.  In the child, we
-// notice the high bit is set and append the pid().  This works
-// assuming cpuprofile filenames don't normally have the high bit set
-// in their first character!  If that assumption is violated, we'll
-// still get a profile, but one with an unexpected name.
-// TODO(csilvers): set an envvar instead when we can do it reliably.
-bool GetUniquePathFromEnv(const char* env_name, char* path) {
-  char* envval = getenv(env_name);
-  if (envval == NULL || *envval == '\0')
-    return false;
-  if (envval[0] & 128) {                  // high bit is set
-    snprintf(path, PATH_MAX, "%c%s_%u",   // add pid and clear high bit
-             envval[0] & 127, envval+1, (unsigned int)(getpid()));
-  } else {
-    snprintf(path, PATH_MAX, "%s", envval);
-    envval[0] |= 128;                     // set high bit for kids to see
-  }
-  return true;
-}
-
-void SleepForMilliseconds(int milliseconds) {
-#ifdef PLATFORM_WINDOWS
-  _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
-#else
-  // Sleep for a few milliseconds
-  struct timespec sleep_time;
-  sleep_time.tv_sec = milliseconds / 1000;
-  sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
-  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
-    ;  // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-int GetSystemCPUsCount()
-{
-#if defined(PLATFORM_WINDOWS)
-  // Get the number of processors.
-  SYSTEM_INFO info;
-  GetSystemInfo(&info);
-  return  info.dwNumberOfProcessors;
-#else
-  long rv = sysconf(_SC_NPROCESSORS_ONLN);
-  if (rv < 0) {
-    return 1;
-  }
-  return static_cast<int>(rv);
-#endif
-}
-
-// ----------------------------------------------------------------------
-
-#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__
-static void ConstructFilename(const char* spec, pid_t pid,
-                              char* buf, int buf_size) {
-  CHECK_LT(snprintf(buf, buf_size,
-                    spec,
-                    static_cast<int>(pid ? pid : getpid())), buf_size);
-}
-#endif
-
-// A templatized helper function instantiated for Mach (OS X) only.
-// It can handle finding info for both 32 bits and 64 bits.
-// Returns true if it successfully handled the hdr, false else.
-#ifdef __MACH__          // Mac OS X, almost certainly
-template<uint32_t kMagic, uint32_t kLCSegment,
-         typename MachHeader, typename SegmentCommand>
-static bool NextExtMachHelper(const mach_header* hdr,
-                              int current_image, int current_load_cmd,
-                              uint64 *start, uint64 *end, char **flags,
-                              uint64 *offset, int64 *inode, char **filename,
-                              uint64 *file_mapping, uint64 *file_pages,
-                              uint64 *anon_mapping, uint64 *anon_pages,
-                              dev_t *dev) {
-  static char kDefaultPerms[5] = "r-xp";
-  if (hdr->magic != kMagic)
-    return false;
-  const char* lc = (const char *)hdr + sizeof(MachHeader);
-  // TODO(csilvers): make this not-quadradic (increment and hold state)
-  for (int j = 0; j < current_load_cmd; j++)  // advance to *our* load_cmd
-    lc += ((const load_command *)lc)->cmdsize;
-  if (((const load_command *)lc)->cmd == kLCSegment) {
-    const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
-    const SegmentCommand* sc = (const SegmentCommand *)lc;
-    if (start) *start = sc->vmaddr + dlloff;
-    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
-    if (flags) *flags = kDefaultPerms;  // can we do better?
-    if (offset) *offset = sc->fileoff;
-    if (inode) *inode = 0;
-    if (filename)
-      *filename = const_cast<char*>(_dyld_get_image_name(current_image));
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;   // could we use sc->filesize?
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-
-  return false;
-}
-#endif
-
-// Finds |c| in |text|, and assign '\0' at the found position.
-// The original character at the modified position should be |c|.
-// A pointer to the modified position is stored in |endptr|.
-// |endptr| should not be NULL.
-static bool ExtractUntilChar(char *text, int c, char **endptr) {
-  CHECK_NE(text, NULL);
-  CHECK_NE(endptr, NULL);
-  char *found;
-  found = strchr(text, c);
-  if (found == NULL) {
-    *endptr = NULL;
-    return false;
-  }
-
-  *endptr = found;
-  *found = '\0';
-  return true;
-}
-
-// Increments |*text_pointer| while it points a whitespace character.
-// It is to follow sscanf's whilespace handling.
-static void SkipWhileWhitespace(char **text_pointer, int c) {
-  if (isspace(c)) {
-    while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) {
-      ++(*text_pointer);
-    }
-  }
-}
-
-template<class T>
-static T StringToInteger(char *text, char **endptr, int base) {
-  assert(false);
-  return T();
-}
-
-template<>
-int StringToInteger<int>(char *text, char **endptr, int base) {
-  return strtol(text, endptr, base);
-}
-
-template<>
-int64 StringToInteger<int64>(char *text, char **endptr, int base) {
-  return strtoll(text, endptr, base);
-}
-
-template<>
-uint64 StringToInteger<uint64>(char *text, char **endptr, int base) {
-  return strtoull(text, endptr, base);
-}
-
-template<typename T>
-static T StringToIntegerUntilChar(
-    char *text, int base, int c, char **endptr_result) {
-  CHECK_NE(endptr_result, NULL);
-  *endptr_result = NULL;
-
-  char *endptr_extract;
-  if (!ExtractUntilChar(text, c, &endptr_extract))
-    return 0;
-
-  T result;
-  char *endptr_strto;
-  result = StringToInteger<T>(text, &endptr_strto, base);
-  *endptr_extract = c;
-
-  if (endptr_extract != endptr_strto)
-    return 0;
-
-  *endptr_result = endptr_extract;
-  SkipWhileWhitespace(endptr_result, c);
-
-  return result;
-}
-
-static char *CopyStringUntilChar(
-    char *text, unsigned out_len, int c, char *out) {
-  char *endptr;
-  if (!ExtractUntilChar(text, c, &endptr))
-    return NULL;
-
-  strncpy(out, text, out_len);
-  out[out_len-1] = '\0';
-  *endptr = c;
-
-  SkipWhileWhitespace(&endptr, c);
-  return endptr;
-}
-
-template<typename T>
-static bool StringToIntegerUntilCharWithCheck(
-    T *outptr, char *text, int base, int c, char **endptr) {
-  *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr);
-  if (*endptr == NULL || **endptr == '\0') return false;
-  ++(*endptr);
-  return true;
-}
-
-static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end,
-                              char *flags, uint64 *offset,
-                              int *major, int *minor, int64 *inode,
-                              unsigned *filename_offset) {
-#if defined(__linux__)
-  /*
-   * It's similar to:
-   * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
-   *        start, end, flags, offset, major, minor, inode, filename_offset)
-   */
-  char *endptr = text;
-  if (endptr == NULL || *endptr == '\0')  return false;
-
-  if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr))
-    return false;
-
-  endptr = CopyStringUntilChar(endptr, 5, ' ', flags);
-  if (endptr == NULL || *endptr == '\0')  return false;
-  ++endptr;
-
-  if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr))
-    return false;
-
-  if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr))
-    return false;
-
-  *filename_offset = (endptr - text);
-  return true;
-#else
-  return false;
-#endif
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid) {
-  Init(pid, NULL, false);
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) {
-  Init(pid, buffer, false);
-}
-
-ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
-                                   bool use_maps_backing) {
-  Init(pid, buffer, use_maps_backing);
-}
-
-void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
-                            bool use_maps_backing) {
-  pid_ = pid;
-  using_maps_backing_ = use_maps_backing;
-  dynamic_buffer_ = NULL;
-  if (!buffer) {
-    // If the user didn't pass in any buffer storage, allocate it
-    // now. This is the normal case; the signal handler passes in a
-    // static buffer.
-    buffer = dynamic_buffer_ = new Buffer;
-  } else {
-    dynamic_buffer_ = NULL;
-  }
-
-  ibuf_ = buffer->buf_;
-
-  stext_ = etext_ = nextline_ = ibuf_;
-  ebuf_ = ibuf_ + Buffer::kBufSize - 1;
-  nextline_ = ibuf_;
-
-#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-  if (use_maps_backing) {  // don't bother with clever "self" stuff in this case
-    ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize);
-  } else if (pid == 0) {
-    // We have to kludge a bit to deal with the args ConstructFilename
-    // expects.  The 1 is never used -- it's only impt. that it's not 0.
-    ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize);
-  }
-  // No error logging since this can be called from the crash dump
-  // handler at awkward moments. Users should call Valid() before
-  // using.
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__FreeBSD__)
-  // We don't support maps_backing on freebsd
-  if (pid == 0) {
-    ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
-  }
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__sun__)
-  if (pid == 0) {
-    ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize);
-  } else {
-    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
-  }
-  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
-#elif defined(__MACH__)
-  current_image_ = _dyld_image_count();   // count down from the top
-  current_load_cmd_ = -1;
-#elif defined(PLATFORM_WINDOWS)
-  snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
-                                       TH32CS_SNAPMODULE32,
-                                       GetCurrentProcessId());
-  memset(&module_, 0, sizeof(module_));
-#else
-  fd_ = -1;   // so Valid() is always false
-#endif
-
-}
-
-ProcMapsIterator::~ProcMapsIterator() {
-#if defined(PLATFORM_WINDOWS)
-  if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
-#elif defined(__MACH__)
-  // no cleanup necessary!
-#else
-  if (fd_ >= 0) NO_INTR(close(fd_));
-#endif
-  delete dynamic_buffer_;
-}
-
-bool ProcMapsIterator::Valid() const {
-#if defined(PLATFORM_WINDOWS)
-  return snapshot_ != INVALID_HANDLE_VALUE;
-#elif defined(__MACH__)
-  return 1;
-#else
-  return fd_ != -1;
-#endif
-}
-
-bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags,
-                            uint64 *offset, int64 *inode, char **filename) {
-  return NextExt(start, end, flags, offset, inode, filename, NULL, NULL,
-                 NULL, NULL, NULL);
-}
-
-// This has too many arguments.  It should really be building
-// a map object and returning it.  The problem is that this is called
-// when the memory allocator state is undefined, hence the arguments.
-bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
-                               uint64 *offset, int64 *inode, char **filename,
-                               uint64 *file_mapping, uint64 *file_pages,
-                               uint64 *anon_mapping, uint64 *anon_pages,
-                               dev_t *dev) {
-
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
-  do {
-    // Advance to the start of the next line
-    stext_ = nextline_;
-
-    // See if we have a complete line in the buffer already
-    nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_));
-    if (!nextline_) {
-      // Shift/fill the buffer so we do have a line
-      int count = etext_ - stext_;
-
-      // Move the current text to the start of the buffer
-      memmove(ibuf_, stext_, count);
-      stext_ = ibuf_;
-      etext_ = ibuf_ + count;
-
-      int nread = 0;            // fill up buffer with text
-      while (etext_ < ebuf_) {
-        NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_));
-        if (nread > 0)
-          etext_ += nread;
-        else
-          break;
-      }
-
-      // Zero out remaining characters in buffer at EOF to avoid returning
-      // garbage from subsequent calls.
-      if (etext_ != ebuf_ && nread == 0) {
-        memset(etext_, 0, ebuf_ - etext_);
-      }
-      *etext_ = '\n';   // sentinel; safe because ibuf extends 1 char beyond ebuf
-      nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_));
-    }
-    *nextline_ = 0;                // turn newline into nul
-    nextline_ += ((nextline_ < etext_)? 1 : 0);  // skip nul if not end of text
-    // stext_ now points at a nul-terminated line
-    uint64 tmpstart, tmpend, tmpoffset;
-    int64 tmpinode;
-    int major, minor;
-    unsigned filename_offset = 0;
-#if defined(__linux__)
-    // for now, assume all linuxes have the same format
-    if (!ParseProcMapsLine(
-        stext_,
-        start ? start : &tmpstart,
-        end ? end : &tmpend,
-        flags_,
-        offset ? offset : &tmpoffset,
-        &major, &minor,
-        inode ? inode : &tmpinode, &filename_offset)) continue;
-#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
-    // cygwin is like linux, except the third field is the "entry point"
-    // rather than the offset (see format_process_maps at
-    // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
-    // Offset is always be 0 on cygwin: cygwin implements an mmap
-    // by loading the whole file and then calling NtMapViewOfSection.
-    // Cygwin also seems to set its flags kinda randomly; use windows default.
-    char tmpflags[5];
-    if (offset)
-      *offset = 0;
-    strcpy(flags_, "r-xp");
-    if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
-               start ? start : &tmpstart,
-               end ? end : &tmpend,
-               tmpflags,
-               &tmpoffset,
-               &major, &minor,
-               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
-#elif defined(__FreeBSD__)
-    // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
-    tmpstart = tmpend = tmpoffset = 0;
-    tmpinode = 0;
-    major = minor = 0;   // can't get this info in freebsd
-    if (inode)
-      *inode = 0;        // nor this
-    if (offset)
-      *offset = 0;       // seems like this should be in there, but maybe not
-    // start end resident privateresident obj(?) prot refcnt shadowcnt
-    // flags copy_on_write needs_copy type filename:
-    // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
-    if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
-               start ? start : &tmpstart,
-               end ? end : &tmpend,
-               flags_,
-               &filename_offset) != 3) continue;
-#endif
-
-    // Depending on the Linux kernel being used, there may or may not be a space
-    // after the inode if there is no filename.  sscanf will in such situations
-    // nondeterministically either fill in filename_offset or not (the results
-    // differ on multiple calls in the same run even with identical arguments).
-    // We don't want to wander off somewhere beyond the end of the string.
-    size_t stext_length = strlen(stext_);
-    if (filename_offset == 0 || filename_offset > stext_length)
-      filename_offset = stext_length;
-
-    // We found an entry
-    if (flags) *flags = flags_;
-    if (filename) *filename = stext_ + filename_offset;
-    if (dev) *dev = minor | (major << 8);
-
-    if (using_maps_backing_) {
-      // Extract and parse physical page backing info.
-      char *backing_ptr = stext_ + filename_offset +
-          strlen(stext_+filename_offset);
-
-      // find the second '('
-      int paren_count = 0;
-      while (--backing_ptr > stext_) {
-        if (*backing_ptr == '(') {
-          ++paren_count;
-          if (paren_count >= 2) {
-            uint64 tmp_file_mapping;
-            uint64 tmp_file_pages;
-            uint64 tmp_anon_mapping;
-            uint64 tmp_anon_pages;
-
-            sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")",
-                   file_mapping ? file_mapping : &tmp_file_mapping,
-                   file_pages ? file_pages : &tmp_file_pages,
-                   anon_mapping ? anon_mapping : &tmp_anon_mapping,
-                   anon_pages ? anon_pages : &tmp_anon_pages);
-            // null terminate the file name (there is a space
-            // before the first (.
-            backing_ptr[-1] = 0;
-            break;
-          }
-        }
-      }
-    }
-
-    return true;
-  } while (etext_ > ibuf_);
-#elif defined(__sun__)
-  // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1
-  static char kPerms[8][4] = { "---", "--x", "-w-", "-wx",
-                               "r--", "r-x", "rw-", "rwx" };
-  COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
-  COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
-  COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
-  Buffer object_path;
-  int nread = 0;            // fill up buffer with text
-  NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
-  if (nread == sizeof(prmap_t)) {
-    long inode_from_mapname = 0;
-    prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_);
-    // Best-effort attempt to get the inode from the filename.  I think the
-    // two middle ints are major and minor device numbers, but I'm not sure.
-    sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
-
-    if (pid_ == 0) {
-      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
-                        "/proc/self/path/%s", mapinfo->pr_mapname),
-               Buffer::kBufSize);
-    } else {
-      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
-                        "/proc/%d/path/%s",
-                        static_cast<int>(pid_), mapinfo->pr_mapname),
-               Buffer::kBufSize);
-    }
-    ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
-    CHECK_LT(len, PATH_MAX);
-    if (len < 0)
-      len = 0;
-    current_filename_[len] = '\0';
-
-    if (start) *start = mapinfo->pr_vaddr;
-    if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
-    if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
-    if (offset) *offset = mapinfo->pr_offset;
-    if (inode) *inode = inode_from_mapname;
-    if (filename) *filename = current_filename_;
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-#elif defined(__MACH__)
-  // We return a separate entry for each segment in the DLL. (TODO(csilvers):
-  // can we do better?)  A DLL ("image") has load-commands, some of which
-  // talk about segment boundaries.
-  // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912
-  for (; current_image_ >= 0; current_image_--) {
-    const mach_header* hdr = _dyld_get_image_header(current_image_);
-    if (!hdr) continue;
-    if (current_load_cmd_ < 0)   // set up for this image
-      current_load_cmd_ = hdr->ncmds;  // again, go from the top down
-
-    // We start with the next load command (we've already looked at this one).
-    for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
-#ifdef MH_MAGIC_64
-      if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
-                            struct mach_header_64, struct segment_command_64>(
-                                hdr, current_image_, current_load_cmd_,
-                                start, end, flags, offset, inode, filename,
-                                file_mapping, file_pages, anon_mapping,
-                                anon_pages, dev)) {
-        return true;
-      }
-#endif
-      if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
-                            struct mach_header, struct segment_command>(
-                                hdr, current_image_, current_load_cmd_,
-                                start, end, flags, offset, inode, filename,
-                                file_mapping, file_pages, anon_mapping,
-                                anon_pages, dev)) {
-        return true;
-      }
-    }
-    // If we get here, no more load_cmd's in this image talk about
-    // segments.  Go on to the next image.
-  }
-#elif defined(PLATFORM_WINDOWS)
-  static char kDefaultPerms[5] = "r-xp";
-  BOOL ok;
-  if (module_.dwSize == 0) {  // only possible before first call
-    module_.dwSize = sizeof(module_);
-    ok = Module32First(snapshot_, &module_);
-  } else {
-    ok = Module32Next(snapshot_, &module_);
-  }
-  if (ok) {
-    uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr);
-    if (start) *start = base_addr;
-    if (end) *end = base_addr + module_.modBaseSize;
-    if (flags) *flags = kDefaultPerms;
-    if (offset) *offset = 0;
-    if (inode) *inode = 0;
-    if (filename) *filename = module_.szExePath;
-    if (file_mapping) *file_mapping = 0;
-    if (file_pages) *file_pages = 0;
-    if (anon_mapping) *anon_mapping = 0;
-    if (anon_pages) *anon_pages = 0;
-    if (dev) *dev = 0;
-    return true;
-  }
-#endif
-
-  // We didn't find anything
-  return false;
-}
-
-int ProcMapsIterator::FormatLine(char* buffer, int bufsize,
-                                 uint64 start, uint64 end, const char *flags,
-                                 uint64 offset, int64 inode,
-                                 const char *filename, dev_t dev) {
-  // We assume 'flags' looks like 'rwxp' or 'rwx'.
-  char r = (flags && flags[0] == 'r') ? 'r' : '-';
-  char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-';
-  char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-';
-  // p always seems set on linux, so we set the default to 'p', not '-'
-  char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p')
-      ? '-' : 'p';
-
-  const int rc = snprintf(buffer, bufsize,
-                          "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n",
-                          start, end, r,w,x,p, offset,
-                          static_cast<int>(dev/256), static_cast<int>(dev%256),
-                          inode, filename);
-  return (rc < 0 || rc >= bufsize) ? 0 : rc;
-}
-
-namespace tcmalloc {
-
-// Helper to add the list of mapped shared libraries to a profile.
-// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
-// and return the actual size occupied in 'buf'.  We fill wrote_all to true
-// if we successfully wrote all proc lines to buf, false else.
-// We do not provision for 0-terminating 'buf'.
-int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
-  ProcMapsIterator::Buffer iterbuf;
-  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
-
-  uint64 start, end, offset;
-  int64 inode;
-  char *flags, *filename;
-  int bytes_written = 0;
-  *wrote_all = true;
-  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
-    const int line_length = it.FormatLine(buf + bytes_written,
-                                          size - bytes_written,
-                                          start, end, flags, offset,
-                                          inode, filename, 0);
-    if (line_length == 0)
-      *wrote_all = false;     // failed to write this line out
-    else
-      bytes_written += line_length;
-
-  }
-  return bytes_written;
-}
-
-// Dump the same data as FillProcSelfMaps reads to fd.
-// It seems easier to repeat parts of FillProcSelfMaps here than to
-// reuse it via a call.
-void DumpProcSelfMaps(RawFD fd) {
-  ProcMapsIterator::Buffer iterbuf;
-  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
-
-  uint64 start, end, offset;
-  int64 inode;
-  char *flags, *filename;
-  ProcMapsIterator::Buffer linebuf;
-  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
-    int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_),
-                                start, end, flags, offset, inode, filename,
-                                0);
-    RawWrite(fd, linebuf.buf_, written);
-  }
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/base/sysinfo.h b/contrib/libtcmalloc/src/base/sysinfo.h
deleted file mode 100644
index 75b101376c5..00000000000
--- a/contrib/libtcmalloc/src/base/sysinfo.h
+++ /dev/null
@@ -1,232 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// All functions here are thread-hostile due to file caching unless
-// commented otherwise.
-
-#ifndef _SYSINFO_H_
-#define _SYSINFO_H_
-
-#include "../config.h"
-
-#include <time.h>
-#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
-#include <windows.h>   // for DWORD
-#include <tlhelp32.h>  // for CreateToolhelp32Snapshot
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for pid_t
-#endif
-#include <stddef.h>    // for size_t
-#include <limits.h>    // for PATH_MAX
-#include "base/basictypes.h"
-#include "base/logging.h"   // for RawFD
-
-// This getenv function is safe to call before the C runtime is initialized.
-// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses
-// /proc/self/environ instead calling getenv().  It's intended to be used in
-// routines that run before main(), when the state required for getenv() may
-// not be set up yet.  In particular, errno isn't set up until relatively late
-// (after the pthreads library has a chance to make it threadsafe), and
-// getenv() doesn't work until then. 
-// On some platforms, this call will utilize the same, static buffer for
-// repeated GetenvBeforeMain() calls. Callers should not expect pointers from
-// this routine to be long lived.
-// Note that on unix, /proc only has the environment at the time the
-// application was started, so this routine ignores setenv() calls/etc.  Also
-// note it only reads the first 16K of the environment.
-extern const char* GetenvBeforeMain(const char* name);
-
-// This takes as an argument an environment-variable name (like
-// CPUPROFILE) whose value is supposed to be a file-path, and sets
-// path to that path, and returns true.  Non-trivial for surprising
-// reasons, as documented in sysinfo.cc.  path must have space PATH_MAX.
-extern bool GetUniquePathFromEnv(const char* env_name, char* path);
-
-extern int GetSystemCPUsCount();
-
-void SleepForMilliseconds(int milliseconds);
-
-//  Return true if we're running POSIX (e.g., NPTL on Linux) threads,
-//  as opposed to a non-POSIX thread library.  The thing that we care
-//  about is whether a thread's pid is the same as the thread that
-//  spawned it.  If so, this function returns true.
-//  Thread-safe.
-//  Note: We consider false negatives to be OK.
-bool HasPosixThreads();
-
-#ifndef SWIG  // SWIG doesn't like struct Buffer and variable arguments.
-
-// A ProcMapsIterator abstracts access to /proc/maps for a given
-// process. Needs to be stack-allocatable and avoid using stdio/malloc
-// so it can be used in the google stack dumper, heap-profiler, etc.
-//
-// On Windows and Mac OS X, this iterator iterates *only* over DLLs
-// mapped into this process space.  For Linux, FreeBSD, and Solaris,
-// it iterates over *all* mapped memory regions, including anonymous
-// mmaps.  For other O/Ss, it is unlikely to work at all, and Valid()
-// will always return false.  Also note: this routine only works on
-// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab:
-//    proc            /proc   procfs  rw 0 0
-class ProcMapsIterator {
- public:
-  struct Buffer {
-#ifdef __FreeBSD__
-    // FreeBSD requires us to read all of the maps file at once, so
-    // we have to make a buffer that's "always" big enough
-    static const size_t kBufSize = 102400;
-#else   // a one-line buffer is good enough
-    static const size_t kBufSize = PATH_MAX + 1024;
-#endif
-    char buf_[kBufSize];
-  };
-
-
-  // Create a new iterator for the specified pid.  pid can be 0 for "self".
-  explicit ProcMapsIterator(pid_t pid);
-
-  // Create an iterator with specified storage (for use in signal
-  // handler). "buffer" should point to a ProcMapsIterator::Buffer
-  // buffer can be NULL in which case a bufer will be allocated.
-  ProcMapsIterator(pid_t pid, Buffer *buffer);
-
-  // Iterate through maps_backing instead of maps if use_maps_backing
-  // is true.  Otherwise the same as above.  buffer can be NULL and
-  // it will allocate a buffer itself.
-  ProcMapsIterator(pid_t pid, Buffer *buffer,
-                   bool use_maps_backing);
-
-  // Returns true if the iterator successfully initialized;
-  bool Valid() const;
-
-  // Returns a pointer to the most recently parsed line. Only valid
-  // after Next() returns true, and until the iterator is destroyed or
-  // Next() is called again.  This may give strange results on non-Linux
-  // systems.  Prefer FormatLine() if that may be a concern.
-  const char *CurrentLine() const { return stext_; }
-
-  // Writes the "canonical" form of the /proc/xxx/maps info for a single
-  // line to the passed-in buffer. Returns the number of bytes written,
-  // or 0 if it was not able to write the complete line.  (To guarantee
-  // success, buffer should have size at least Buffer::kBufSize.)
-  // Takes as arguments values set via a call to Next().  The
-  // "canonical" form of the line (taken from linux's /proc/xxx/maps):
-  //    <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)>   +
-  //    <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the
-  // eg
-  //    08048000-0804c000 r-xp 00000000 03:01 3793678    /bin/cat
-  // If you don't have the dev_t (dev), feel free to pass in 0.
-  // (Next() doesn't return a dev_t, though NextExt does.)
-  //
-  // Note: if filename and flags were obtained via a call to Next(),
-  // then the output of this function is only valid if Next() returned
-  // true, and only until the iterator is destroyed or Next() is
-  // called again.  (Since filename, at least, points into CurrentLine.)
-  static int FormatLine(char* buffer, int bufsize,
-                        uint64 start, uint64 end, const char *flags,
-                        uint64 offset, int64 inode, const char *filename,
-                        dev_t dev);
-
-  // Find the next entry in /proc/maps; return true if found or false
-  // if at the end of the file.
-  //
-  // Any of the result pointers can be NULL if you're not interested
-  // in those values.
-  //
-  // If "flags" and "filename" are passed, they end up pointing to
-  // storage within the ProcMapsIterator that is valid only until the
-  // iterator is destroyed or Next() is called again. The caller may
-  // modify the contents of these strings (up as far as the first NUL,
-  // and only until the subsequent call to Next()) if desired.
-
-  // The offsets are all uint64 in order to handle the case of a
-  // 32-bit process running on a 64-bit kernel
-  //
-  // IMPORTANT NOTE: see top-of-class notes for details about what
-  // mapped regions Next() iterates over, depending on O/S.
-  // TODO(csilvers): make flags and filename const.
-  bool Next(uint64 *start, uint64 *end, char **flags,
-            uint64 *offset, int64 *inode, char **filename);
-
-  bool NextExt(uint64 *start, uint64 *end, char **flags,
-               uint64 *offset, int64 *inode, char **filename,
-               uint64 *file_mapping, uint64 *file_pages,
-               uint64 *anon_mapping, uint64 *anon_pages,
-               dev_t *dev);
-
-  ~ProcMapsIterator();
-
- private:
-  void Init(pid_t pid, Buffer *buffer, bool use_maps_backing);
-
-  char *ibuf_;        // input buffer
-  char *stext_;       // start of text
-  char *etext_;       // end of text
-  char *nextline_;    // start of next line
-  char *ebuf_;        // end of buffer (1 char for a nul)
-#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
-  HANDLE snapshot_;   // filehandle on dll info
-  // In a change from the usual W-A pattern, there is no A variant of
-  // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
-  // We want the original A variants, and this #undef is the only
-  // way I see to get them.  Redefining it when we're done prevents us
-  // from affecting other .cc files.
-# ifdef MODULEENTRY32  // Alias of W
-#   undef MODULEENTRY32
-  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
-#   define MODULEENTRY32 MODULEENTRY32W
-# else  // It's the ascii, the one we want.
-  MODULEENTRY32 module_;   // info about current dll (and dll iterator)
-# endif
-#elif defined(__MACH__)
-  int current_image_; // dll's are called "images" in macos parlance
-  int current_load_cmd_;   // the segment of this dll we're examining
-#elif defined(__sun__)     // Solaris
-  int fd_;
-  char current_filename_[PATH_MAX];
-#else
-  int fd_;            // filehandle on /proc/*/maps
-#endif
-  pid_t pid_;
-  char flags_[10];
-  Buffer* dynamic_buffer_;  // dynamically-allocated Buffer
-  bool using_maps_backing_; // true if we are looking at maps_backing instead of maps.
-};
-
-#endif  /* #ifndef SWIG */
-
-// Helper routines
-
-namespace tcmalloc {
-int FillProcSelfMaps(char buf[], int size, bool* wrote_all);
-void DumpProcSelfMaps(RawFD fd);
-}
-
-#endif   /* #ifndef _SYSINFO_H_ */
diff --git a/contrib/libtcmalloc/src/base/thread_annotations.h b/contrib/libtcmalloc/src/base/thread_annotations.h
deleted file mode 100644
index f57b2999ee7..00000000000
--- a/contrib/libtcmalloc/src/base/thread_annotations.h
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Le-Chun Wu
-//
-// This header file contains the macro definitions for thread safety
-// annotations that allow the developers to document the locking policies
-// of their multi-threaded code. The annotations can also help program
-// analysis tools to identify potential thread safety issues.
-//
-// The annotations are implemented using GCC's "attributes" extension.
-// Using the macros defined here instead of the raw GCC attributes allows
-// for portability and future compatibility.
-//
-// This functionality is not yet fully implemented in perftools,
-// but may be one day.
-
-#ifndef BASE_THREAD_ANNOTATIONS_H_
-#define BASE_THREAD_ANNOTATIONS_H_
-
-
-#if defined(__GNUC__) \
-  && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \
-  && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG))
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
-#else
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
-#endif
-
-
-// Document if a shared variable/field needs to be protected by a lock.
-// GUARDED_BY allows the user to specify a particular lock that should be
-// held when accessing the annotated variable, while GUARDED_VAR only
-// indicates a shared variable should be guarded (by any lock). GUARDED_VAR
-// is primarily used when the client cannot express the name of the lock.
-#define GUARDED_BY(x)          THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
-#define GUARDED_VAR            THREAD_ANNOTATION_ATTRIBUTE__(guarded)
-
-// Document if the memory location pointed to by a pointer should be guarded
-// by a lock when dereferencing the pointer. Similar to GUARDED_VAR,
-// PT_GUARDED_VAR is primarily used when the client cannot express the name
-// of the lock. Note that a pointer variable to a shared memory location
-// could itself be a shared variable. For example, if a shared global pointer
-// q, which is guarded by mu1, points to a shared memory location that is
-// guarded by mu2, q should be annotated as follows:
-//     int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2);
-#define PT_GUARDED_BY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x))
-#define PT_GUARDED_VAR \
-  THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded)
-
-// Document the acquisition order between locks that can be held
-// simultaneously by a thread. For any two locks that need to be annotated
-// to establish an acquisition order, only one of them needs the annotation.
-// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
-// and ACQUIRED_BEFORE.)
-#define ACQUIRED_AFTER(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x))
-#define ACQUIRED_BEFORE(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x))
-
-// The following three annotations document the lock requirements for
-// functions/methods.
-
-// Document if a function expects certain locks to be held before it is called
-#define EXCLUSIVE_LOCKS_REQUIRED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x))
-
-#define SHARED_LOCKS_REQUIRED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x))
-
-// Document the locks acquired in the body of the function. These locks
-// cannot be held when calling this function (as google3's Mutex locks are
-// non-reentrant).
-#define LOCKS_EXCLUDED(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x))
-
-// Document the lock the annotated function returns without acquiring it.
-#define LOCK_RETURNED(x)       THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
-
-// Document if a class/type is a lockable type (such as the Mutex class).
-#define LOCKABLE               THREAD_ANNOTATION_ATTRIBUTE__(lockable)
-
-// Document if a class is a scoped lockable type (such as the MutexLock class).
-#define SCOPED_LOCKABLE        THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
-
-// The following annotations specify lock and unlock primitives.
-#define EXCLUSIVE_LOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x))
-
-#define SHARED_LOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x))
-
-#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x))
-
-#define SHARED_TRYLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x))
-
-#define UNLOCK_FUNCTION(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(unlock(x))
-
-// An escape hatch for thread safety analysis to ignore the annotated function.
-#define NO_THREAD_SAFETY_ANALYSIS \
-  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
-
-#endif  // BASE_THREAD_ANNOTATIONS_H_
diff --git a/contrib/libtcmalloc/src/base/thread_lister.c b/contrib/libtcmalloc/src/base/thread_lister.c
deleted file mode 100644
index 9dc8d721892..00000000000
--- a/contrib/libtcmalloc/src/base/thread_lister.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#include "config.h"
-
-#include "base/thread_lister.h"
-
-#include <stdio.h>         /* needed for NULL on some powerpc platforms (?!) */
-#include <sys/types.h>
-#include <unistd.h>        /* for getpid */
-
-#ifdef HAVE_SYS_PRCTL
-# include <sys/prctl.h>
-#endif
-
-#include "base/linuxthreads.h"
-/* Include other thread listers here that define THREADS macro
- * only when they can provide a good implementation.
- */
-
-#ifndef THREADS
-
-/* Default trivial thread lister for single-threaded applications,
- * or if the multi-threading code has not been ported, yet.
- */
-
-int TCMalloc_ListAllProcessThreads(void *parameter,
-				   ListAllProcessThreadsCallBack callback, ...) {
-  int rc;
-  va_list ap;
-  pid_t pid;
-
-#ifdef HAVE_SYS_PRCTL
-  int dumpable = prctl(PR_GET_DUMPABLE, 0);
-  if (!dumpable)
-    prctl(PR_SET_DUMPABLE, 1);
-#endif
-  va_start(ap, callback);
-  pid = getpid();
-  rc = callback(parameter, 1, &pid, ap);
-  va_end(ap);
-#ifdef HAVE_SYS_PRCTL
-  if (!dumpable)
-    prctl(PR_SET_DUMPABLE, 0);
-#endif
-  return rc;
-}
-
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
-  return 1;
-}
-
-#endif   /* ifndef THREADS */
diff --git a/contrib/libtcmalloc/src/base/thread_lister.h b/contrib/libtcmalloc/src/base/thread_lister.h
deleted file mode 100644
index 6e70b89fef5..00000000000
--- a/contrib/libtcmalloc/src/base/thread_lister.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
-/* Copyright (c) 2005-2007, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#ifndef _THREAD_LISTER_H
-#define _THREAD_LISTER_H
-
-#include <stdarg.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef int (*ListAllProcessThreadsCallBack)(void *parameter,
-                                             int num_threads,
-                                             pid_t *thread_pids,
-                                             va_list ap);
-
-/* This function gets the list of all linux threads of the current process
- * passes them to the 'callback' along with the 'parameter' pointer; at the
- * call back call time all the threads are paused via
- * PTRACE_ATTACH.
- * The callback is executed from a separate thread which shares only the
- * address space, the filesystem, and the filehandles with the caller. Most
- * notably, it does not share the same pid and ppid; and if it terminates,
- * the rest of the application is still there. 'callback' is supposed to do
- * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if
- * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous
- * signals are blocked. If the 'callback' decides to unblock them, it must
- * ensure that they cannot terminate the application, or that
- * TCMalloc_ResumeAllProcessThreads will get called.
- * It is an error for the 'callback' to make any library calls that could
- * acquire locks. Most notably, this means that most system calls have to
- * avoid going through libc. Also, this means that it is not legal to call
- * exit() or abort().
- * We return -1 on error and the return value of 'callback' on success.
- */
-int TCMalloc_ListAllProcessThreads(void *parameter,
-                                   ListAllProcessThreadsCallBack callback, ...);
-
-/* This function resumes the list of all linux threads that
- * TCMalloc_ListAllProcessThreads pauses before giving to its
- * callback.  The function returns non-zero if at least one thread was
- * suspended and has now been resumed.
- */
-int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* _THREAD_LISTER_H */
diff --git a/contrib/libtcmalloc/src/base/vdso_support.cc b/contrib/libtcmalloc/src/base/vdso_support.cc
deleted file mode 100644
index 730df3011d6..00000000000
--- a/contrib/libtcmalloc/src/base/vdso_support.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in the kernel VDSO page.
-//
-// VDSOSupport -- a class representing kernel VDSO (if present).
-//
-
-#include "base/vdso_support.h"
-
-#ifdef HAVE_VDSO_SUPPORT     // defined in vdso_support.h
-
-#include <fcntl.h>
-#include <stddef.h>   // for ptrdiff_t
-
-#include "base/atomicops.h"  // for MemoryBarrier
-#include "base/linux_syscall_support.h"
-#include "base/logging.h"
-#include "base/dynamic_annotations.h"
-#include "base/basictypes.h"  // for COMPILE_ASSERT
-
-using base::subtle::MemoryBarrier;
-
-#ifndef AT_SYSINFO_EHDR
-#define AT_SYSINFO_EHDR 33
-#endif
-
-namespace base {
-
-const void *VDSOSupport::vdso_base_ = ElfMemImage::kInvalidBase;
-VDSOSupport::VDSOSupport()
-    // If vdso_base_ is still set to kInvalidBase, we got here
-    // before VDSOSupport::Init has been called. Call it now.
-    : image_(vdso_base_ == ElfMemImage::kInvalidBase ? Init() : vdso_base_) {
-}
-
-// NOTE: we can't use GoogleOnceInit() below, because we can be
-// called by tcmalloc, and none of the *once* stuff may be functional yet.
-//
-// In addition, we hope that the VDSOSupportHelper constructor
-// causes this code to run before there are any threads, and before
-// InitGoogle() has executed any chroot or setuid calls.
-//
-// Finally, even if there is a race here, it is harmless, because
-// the operation should be idempotent.
-const void *VDSOSupport::Init() {
-  if (vdso_base_ == ElfMemImage::kInvalidBase) {
-    // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[]
-    // on stack, and so glibc works as if VDSO was not present.
-    // But going directly to kernel via /proc/self/auxv below bypasses
-    // Valgrind zapping. So we check for Valgrind separately.
-    if (RunningOnValgrind()) {
-      vdso_base_ = NULL;
-      return NULL;
-    }
-    int fd = open("/proc/self/auxv", O_RDONLY);
-    if (fd == -1) {
-      // Kernel too old to have a VDSO.
-      vdso_base_ = NULL;
-      return NULL;
-    }
-    ElfW(auxv_t) aux;
-    while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
-      if (aux.a_type == AT_SYSINFO_EHDR) {
-        COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val),
-                       unexpected_sizeof_pointer_NE_sizeof_a_val);
-        vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val);
-        break;
-      }
-    }
-    close(fd);
-    if (vdso_base_ == ElfMemImage::kInvalidBase) {
-      // Didn't find AT_SYSINFO_EHDR in auxv[].
-      vdso_base_ = NULL;
-    }
-  }
-  return vdso_base_;
-}
-
-const void *VDSOSupport::SetBase(const void *base) {
-  CHECK(base != ElfMemImage::kInvalidBase);
-  const void *old_base = vdso_base_;
-  vdso_base_ = base;
-  image_.Init(base);
-  return old_base;
-}
-
-bool VDSOSupport::LookupSymbol(const char *name,
-                               const char *version,
-                               int type,
-                               SymbolInfo *info) const {
-  return image_.LookupSymbol(name, version, type, info);
-}
-
-bool VDSOSupport::LookupSymbolByAddress(const void *address,
-                                        SymbolInfo *info_out) const {
-  return image_.LookupSymbolByAddress(address, info_out);
-}
-
-// We need to make sure VDSOSupport::Init() is called before
-// the main() runs, since it might do something like setuid or
-// chroot.  If VDSOSupport
-// is used in any global constructor, this will happen, since
-// VDSOSupport's constructor calls Init.  But if not, we need to
-// ensure it here, with a global constructor of our own.  This
-// is an allowed exception to the normal rule against non-trivial
-// global constructors.
-static class VDSOInitHelper {
- public:
-  VDSOInitHelper() { VDSOSupport::Init(); }
-} vdso_init_helper;
-}
-
-#endif  // HAVE_VDSO_SUPPORT
diff --git a/contrib/libtcmalloc/src/base/vdso_support.h b/contrib/libtcmalloc/src/base/vdso_support.h
deleted file mode 100644
index 0c2213c7c06..00000000000
--- a/contrib/libtcmalloc/src/base/vdso_support.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Pluzhnikov
-//
-// Allow dynamic symbol lookup in the kernel VDSO page.
-//
-// VDSO stands for "Virtual Dynamic Shared Object" -- a page of
-// executable code, which looks like a shared library, but doesn't
-// necessarily exist anywhere on disk, and which gets mmap()ed into
-// every process by kernels which support VDSO, such as 2.6.x for 32-bit
-// executables, and 2.6.24 and above for 64-bit executables.
-//
-// More details could be found here:
-// http://www.trilithium.com/johan/2005/08/linux-gate/
-//
-// VDSOSupport -- a class representing kernel VDSO (if present).
-//
-// Example usage:
-//  VDSOSupport vdso;
-//  VDSOSupport::SymbolInfo info;
-//  typedef (*FN)(unsigned *, void *, void *);
-//  FN fn = NULL;
-//  if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
-//     fn = reinterpret_cast<FN>(info.address);
-//  }
-
-#ifndef BASE_VDSO_SUPPORT_H_
-#define BASE_VDSO_SUPPORT_H_
-
-#include "../config.h"
-#include "base/basictypes.h"
-#include "base/elf_mem_image.h"
-
-#ifdef HAVE_ELF_MEM_IMAGE
-
-#define HAVE_VDSO_SUPPORT 1
-
-#include <stdlib.h>     // for NULL
-
-namespace base {
-
-// NOTE: this class may be used from within tcmalloc, and can not
-// use any memory allocation routines.
-class VDSOSupport {
- public:
-  VDSOSupport();
-
-  typedef ElfMemImage::SymbolInfo SymbolInfo;
-  typedef ElfMemImage::SymbolIterator SymbolIterator;
-
-  // Answers whether we have a vdso at all.
-  bool IsPresent() const { return image_.IsPresent(); }
-
-  // Allow to iterate over all VDSO symbols.
-  SymbolIterator begin() const { return image_.begin(); }
-  SymbolIterator end() const { return image_.end(); }
-
-  // Look up versioned dynamic symbol in the kernel VDSO.
-  // Returns false if VDSO is not present, or doesn't contain given
-  // symbol/version/type combination.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbol(const char *name, const char *version,
-                    int symbol_type, SymbolInfo *info_out) const;
-
-  // Find info about symbol (if any) which overlaps given address.
-  // Returns true if symbol was found; false if VDSO isn't present
-  // or doesn't have a symbol overlapping given address.
-  // If info_out != NULL, additional details are filled in.
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
-
-  // Used only for testing. Replace real VDSO base with a mock.
-  // Returns previous value of vdso_base_. After you are done testing,
-  // you are expected to call SetBase() with previous value, in order to
-  // reset state to the way it was.
-  const void *SetBase(const void *s);
-
-  // Computes vdso_base_ and returns it. Should be called as early as
-  // possible; before any thread creation, chroot or setuid.
-  static const void *Init();
-
- private:
-  // image_ represents VDSO ELF image in memory.
-  // image_.ehdr_ == NULL implies there is no VDSO.
-  ElfMemImage image_;
-
-  // Cached value of auxv AT_SYSINFO_EHDR, computed once.
-  // This is a tri-state:
-  //   kInvalidBase   => value hasn't been determined yet.
-  //              0   => there is no VDSO.
-  //           else   => vma of VDSO Elf{32,64}_Ehdr.
-  //
-  // When testing with mock VDSO, low bit is set.
-  // The low bit is always available because vdso_base_ is
-  // page-aligned.
-  static const void *vdso_base_;
-
-  DISALLOW_COPY_AND_ASSIGN(VDSOSupport);
-};
-
-}  // namespace base
-
-#endif  // HAVE_ELF_MEM_IMAGE
-
-#endif  // BASE_VDSO_SUPPORT_H_
diff --git a/contrib/libtcmalloc/src/central_freelist.cc b/contrib/libtcmalloc/src/central_freelist.cc
deleted file mode 100644
index 11b190dcfee..00000000000
--- a/contrib/libtcmalloc/src/central_freelist.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include <algorithm>
-#include "central_freelist.h"
-#include "internal_logging.h"  // for ASSERT, MESSAGE
-#include "linked_list.h"       // for SLL_Next, SLL_Push, etc
-#include "page_heap.h"         // for PageHeap
-#include "static_vars.h"       // for Static
-
-using std::min;
-using std::max;
-
-namespace tcmalloc {
-
-void CentralFreeList::Init(size_t cl) {
-  size_class_ = cl;
-  tcmalloc::DLL_Init(&empty_);
-  tcmalloc::DLL_Init(&nonempty_);
-  num_spans_ = 0;
-  counter_ = 0;
-
-  max_cache_size_ = kMaxNumTransferEntries;
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // Disable the transfer cache for the small footprint case.
-  cache_size_ = 0;
-#else
-  cache_size_ = 16;
-#endif
-  if (cl > 0) {
-    // Limit the maximum size of the cache based on the size class.  If this
-    // is not done, large size class objects will consume a lot of memory if
-    // they just sit in the transfer cache.
-    int32_t bytes = Static::sizemap()->ByteSizeForClass(cl);
-    int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl);
-
-    ASSERT(objs_to_move > 0 && bytes > 0);
-    // Limit each size class cache to at most 1MB of objects or one entry,
-    // whichever is greater. Total transfer cache memory used across all
-    // size classes then can't be greater than approximately
-    // 1MB * kMaxNumTransferEntries.
-    // min and max are in parens to avoid macro-expansion on windows.
-    max_cache_size_ = (min)(max_cache_size_,
-                          (max)(1, (1024 * 1024) / (bytes * objs_to_move)));
-    cache_size_ = (min)(cache_size_, max_cache_size_);
-  }
-  used_slots_ = 0;
-  ASSERT(cache_size_ <= max_cache_size_);
-}
-
-void CentralFreeList::ReleaseListToSpans(void* start) {
-  while (start) {
-    void *next = SLL_Next(start);
-    ReleaseToSpans(start);
-    start = next;
-  }
-}
-
-// MapObjectToSpan should logically be part of ReleaseToSpans.  But
-// this triggers an optimization bug in gcc 4.5.0.  Moving to a
-// separate function, and making sure that function isn't inlined,
-// seems to fix the problem.  It also should be fixed for gcc 4.5.1.
-static
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0
-__attribute__ ((noinline))
-#endif
-Span* MapObjectToSpan(void* object) {
-  const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
-  Span* span = Static::pageheap()->GetDescriptor(p);
-  return span;
-}
-
-void CentralFreeList::ReleaseToSpans(void* object) {
-  Span* span = MapObjectToSpan(object);
-  ASSERT(span != NULL);
-  ASSERT(span->refcount > 0);
-
-  // If span is empty, move it to non-empty list
-  if (span->objects == NULL) {
-    tcmalloc::DLL_Remove(span);
-    tcmalloc::DLL_Prepend(&nonempty_, span);
-    Event(span, 'N', 0);
-  }
-
-  // The following check is expensive, so it is disabled by default
-  if (false) {
-    // Check that object does not occur in list
-    int got = 0;
-    for (void* p = span->objects; p != NULL; p = *((void**) p)) {
-      ASSERT(p != object);
-      got++;
-    }
-    ASSERT(got + span->refcount ==
-           (span->length<<kPageShift) /
-           Static::sizemap()->ByteSizeForClass(span->sizeclass));
-  }
-
-  counter_++;
-  span->refcount--;
-  if (span->refcount == 0) {
-    Event(span, '#', 0);
-    counter_ -= ((span->length<<kPageShift) /
-                 Static::sizemap()->ByteSizeForClass(span->sizeclass));
-    tcmalloc::DLL_Remove(span);
-    --num_spans_;
-
-    // Release central list lock while operating on pageheap
-    lock_.Unlock();
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Static::pageheap()->Delete(span);
-    }
-    lock_.Lock();
-  } else {
-    *(reinterpret_cast<void**>(object)) = span->objects;
-    span->objects = object;
-  }
-}
-
-bool CentralFreeList::EvictRandomSizeClass(
-    int locked_size_class, bool force) {
-  static int race_counter = 0;
-  int t = race_counter++;  // Updated without a lock, but who cares.
-  if (t >= kNumClasses) {
-    while (t >= kNumClasses) {
-      t -= kNumClasses;
-    }
-    race_counter = t;
-  }
-  ASSERT(t >= 0);
-  ASSERT(t < kNumClasses);
-  if (t == locked_size_class) return false;
-  return Static::central_cache()[t].ShrinkCache(locked_size_class, force);
-}
-
-bool CentralFreeList::MakeCacheSpace() {
-  // Is there room in the cache?
-  if (used_slots_ < cache_size_) return true;
-  // Check if we can expand this cache?
-  if (cache_size_ == max_cache_size_) return false;
-  // Ok, we'll try to grab an entry from some other size class.
-  if (EvictRandomSizeClass(size_class_, false) ||
-      EvictRandomSizeClass(size_class_, true)) {
-    // Succeeded in evicting, we're going to make our cache larger.
-    // However, we may have dropped and re-acquired the lock in
-    // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the
-    // cache_size may have changed.  Therefore, check and verify that it is
-    // still OK to increase the cache_size.
-    if (cache_size_ < max_cache_size_) {
-      cache_size_++;
-      return true;
-    }
-  }
-  return false;
-}
-
-
-namespace {
-class LockInverter {
- private:
-  SpinLock *held_, *temp_;
- public:
-  inline explicit LockInverter(SpinLock* held, SpinLock *temp)
-    : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
-  inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
-};
-}
-
-// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses
-// LockInverter to release one lock and acquire another in scoped-lock
-// style, which our current annotation/analysis does not support.
-bool CentralFreeList::ShrinkCache(int locked_size_class, bool force)
-    NO_THREAD_SAFETY_ANALYSIS {
-  // Start with a quick check without taking a lock.
-  if (cache_size_ == 0) return false;
-  // We don't evict from a full cache unless we are 'forcing'.
-  if (force == false && used_slots_ == cache_size_) return false;
-
-  // Grab lock, but first release the other lock held by this thread.  We use
-  // the lock inverter to ensure that we never hold two size class locks
-  // concurrently.  That can create a deadlock because there is no well
-  // defined nesting order.
-  LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_);
-  ASSERT(used_slots_ <= cache_size_);
-  ASSERT(0 <= cache_size_);
-  if (cache_size_ == 0) return false;
-  if (used_slots_ == cache_size_) {
-    if (force == false) return false;
-    // ReleaseListToSpans releases the lock, so we have to make all the
-    // updates to the central list before calling it.
-    cache_size_--;
-    used_slots_--;
-    ReleaseListToSpans(tc_slots_[used_slots_].head);
-    return true;
-  }
-  cache_size_--;
-  return true;
-}
-
-void CentralFreeList::InsertRange(void *start, void *end, int N) {
-  SpinLockHolder h(&lock_);
-  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
-    MakeCacheSpace()) {
-    int slot = used_slots_++;
-    ASSERT(slot >=0);
-    ASSERT(slot < max_cache_size_);
-    TCEntry *entry = &tc_slots_[slot];
-    entry->head = start;
-    entry->tail = end;
-    return;
-  }
-  ReleaseListToSpans(start);
-}
-
-int CentralFreeList::RemoveRange(void **start, void **end, int N) {
-  ASSERT(N > 0);
-  lock_.Lock();
-  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
-      used_slots_ > 0) {
-    int slot = --used_slots_;
-    ASSERT(slot >= 0);
-    TCEntry *entry = &tc_slots_[slot];
-    *start = entry->head;
-    *end = entry->tail;
-    lock_.Unlock();
-    return N;
-  }
-
-  int result = 0;
-  *start = NULL;
-  *end = NULL;
-  // TODO: Prefetch multiple TCEntries?
-  result = FetchFromOneSpansSafe(N, start, end);
-  if (result != 0) {
-    while (result < N) {
-      int n;
-      void* head = NULL;
-      void* tail = NULL;
-      n = FetchFromOneSpans(N - result, &head, &tail);
-      if (!n) break;
-      result += n;
-      SLL_PushRange(start, head, tail);
-    }
-  }
-  lock_.Unlock();
-  return result;
-}
-
-
-int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) {
-  int result = FetchFromOneSpans(N, start, end);
-  if (!result) {
-    Populate();
-    result = FetchFromOneSpans(N, start, end);
-  }
-  return result;
-}
-
-int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) {
-  if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0;
-  Span* span = nonempty_.next;
-
-  ASSERT(span->objects != NULL);
-
-  int result = 0;
-  void *prev, *curr;
-  curr = span->objects;
-  do {
-    prev = curr;
-    curr = *(reinterpret_cast<void**>(curr));
-  } while (++result < N && curr != NULL);
-
-  if (curr == NULL) {
-    // Move to empty list
-    tcmalloc::DLL_Remove(span);
-    tcmalloc::DLL_Prepend(&empty_, span);
-    Event(span, 'E', 0);
-  }
-
-  *start = span->objects;
-  *end = prev;
-  span->objects = curr;
-  SLL_SetNext(*end, NULL);
-  span->refcount += result;
-  counter_ -= result;
-  return result;
-}
-
-// Fetch memory from the system and add to the central cache freelist.
-void CentralFreeList::Populate() {
-  // Release central list lock while operating on pageheap
-  lock_.Unlock();
-  const size_t npages = Static::sizemap()->class_to_pages(size_class_);
-
-  Span* span;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    span = Static::pageheap()->New(npages);
-    if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
-  }
-  if (span == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed", npages << kPageShift);
-    lock_.Lock();
-    return;
-  }
-  ASSERT(span->length == npages);
-  // Cache sizeclass info eagerly.  Locking is not necessary.
-  // (Instead of being eager, we could just replace any stale info
-  // about this span, but that seems to be no better in practice.)
-  for (int i = 0; i < npages; i++) {
-    Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
-  }
-
-  // Split the block into pieces and add to the free-list
-  // TODO: coloring of objects to avoid cache conflicts?
-  void** tail = &span->objects;
-  char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
-  char* limit = ptr + (npages << kPageShift);
-  const size_t size = Static::sizemap()->ByteSizeForClass(size_class_);
-  int num = 0;
-  while (ptr + size <= limit) {
-    *tail = ptr;
-    tail = reinterpret_cast<void**>(ptr);
-    ptr += size;
-    num++;
-  }
-  ASSERT(ptr <= limit);
-  *tail = NULL;
-  span->refcount = 0; // No sub-object in use yet
-
-  // Add span to list of non-empty spans
-  lock_.Lock();
-  tcmalloc::DLL_Prepend(&nonempty_, span);
-  ++num_spans_;
-  counter_ += num;
-}
-
-int CentralFreeList::tc_length() {
-  SpinLockHolder h(&lock_);
-  return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_);
-}
-
-size_t CentralFreeList::OverheadBytes() {
-  SpinLockHolder h(&lock_);
-  if (size_class_ == 0) {  // 0 holds the 0-sized allocations
-    return 0;
-  }
-  const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_);
-  const size_t object_size = Static::sizemap()->class_to_size(size_class_);
-  ASSERT(object_size > 0);
-  const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size;
-  return num_spans_ * overhead_per_span;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/central_freelist.h b/contrib/libtcmalloc/src/central_freelist.h
deleted file mode 100644
index 4148680d20a..00000000000
--- a/contrib/libtcmalloc/src/central_freelist.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_CENTRAL_FREELIST_H_
-#define TCMALLOC_CENTRAL_FREELIST_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for int32_t
-#endif
-#include "base/spinlock.h"
-#include "base/thread_annotations.h"
-#include "common.h"
-#include "span.h"
-
-namespace tcmalloc {
-
-// Data kept per size-class in central cache.
-class CentralFreeList {
- public:
-  // A CentralFreeList may be used before its constructor runs.
-  // So we prevent lock_'s constructor from doing anything to the
-  // lock_ state.
-  CentralFreeList() : lock_(base::LINKER_INITIALIZED) { }
-
-  void Init(size_t cl);
-
-  // These methods all do internal locking.
-
-  // Insert the specified range into the central freelist.  N is the number of
-  // elements in the range.  RemoveRange() is the opposite operation.
-  void InsertRange(void *start, void *end, int N);
-
-  // Returns the actual number of fetched elements and sets *start and *end.
-  int RemoveRange(void **start, void **end, int N);
-
-  // Returns the number of free objects in cache.
-  int length() {
-    SpinLockHolder h(&lock_);
-    return counter_;
-  }
-
-  // Returns the number of free objects in the transfer cache.
-  int tc_length();
-
-  // Returns the memory overhead (internal fragmentation) attributable
-  // to the freelist.  This is memory lost when the size of elements
-  // in a freelist doesn't exactly divide the page-size (an 8192-byte
-  // page full of 5-byte objects would have 2 bytes memory overhead).
-  size_t OverheadBytes();
-
-  // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
-  // to set the lock in a consistent state before the fork.
-  void Lock() {
-    lock_.Lock();
-  }
-
-  void Unlock() {
-    lock_.Unlock();
-  }
-
- private:
-  // TransferCache is used to cache transfers of
-  // sizemap.num_objects_to_move(size_class) back and forth between
-  // thread caches and the central cache for a given size class.
-  struct TCEntry {
-    void *head;  // Head of chain of objects.
-    void *tail;  // Tail of chain of objects.
-  };
-
-  // A central cache freelist can have anywhere from 0 to kMaxNumTransferEntries
-  // slots to put link list chains into.
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // For the small memory model, the transfer cache is not used.
-  static const int kMaxNumTransferEntries = 0;
-#else
-  // Starting point for the the maximum number of entries in the transfer cache.
-  // This actual maximum for a given size class may be lower than this
-  // maximum value.
-  static const int kMaxNumTransferEntries = 64;
-#endif
-
-  // REQUIRES: lock_ is held
-  // Remove object from cache and return.
-  // Return NULL if no free entries in cache.
-  int FetchFromOneSpans(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Remove object from cache and return.  Fetches
-  // from pageheap if cache is empty.  Only returns
-  // NULL on allocation failure.
-  int FetchFromOneSpansSafe(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Release a linked list of objects to spans.
-  // May temporarily release lock_.
-  void ReleaseListToSpans(void *start) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Release an object to spans.
-  // May temporarily release lock_.
-  void ReleaseToSpans(void* object) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ is held
-  // Populate cache by fetching from the page heap.
-  // May temporarily release lock_.
-  void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock is held.
-  // Tries to make room for a TCEntry.  If the cache is full it will try to
-  // expand it at the cost of some other cache size.  Return false if there is
-  // no space.
-  bool MakeCacheSpace() EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
-  // REQUIRES: lock_ for locked_size_class is held.
-  // Picks a "random" size class to steal TCEntry slot from.  In reality it
-  // just iterates over the sizeclasses but does so without taking a lock.
-  // Returns true on success.
-  // May temporarily lock a "random" size class.
-  static bool EvictRandomSizeClass(int locked_size_class, bool force);
-
-  // REQUIRES: lock_ is *not* held.
-  // Tries to shrink the Cache.  If force is true it will relase objects to
-  // spans if it allows it to shrink the cache.  Return false if it failed to
-  // shrink the cache.  Decrements cache_size_ on succeess.
-  // May temporarily take lock_.  If it takes lock_, the locked_size_class
-  // lock is released to keep the thread from holding two size class locks
-  // concurrently which could lead to a deadlock.
-  bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_);
-
-  // This lock protects all the data members.  cached_entries and cache_size_
-  // may be looked at without holding the lock.
-  SpinLock lock_;
-
-  // We keep linked lists of empty and non-empty spans.
-  size_t   size_class_;     // My size class
-  Span     empty_;          // Dummy header for list of empty spans
-  Span     nonempty_;       // Dummy header for list of non-empty spans
-  size_t   num_spans_;      // Number of spans in empty_ plus nonempty_
-  size_t   counter_;        // Number of free objects in cache entry
-
-  // Here we reserve space for TCEntry cache slots.  Space is preallocated
-  // for the largest possible number of entries than any one size class may
-  // accumulate.  Not all size classes are allowed to accumulate
-  // kMaxNumTransferEntries, so there is some wasted space for those size
-  // classes.
-  TCEntry tc_slots_[kMaxNumTransferEntries];
-
-  // Number of currently used cached entries in tc_slots_.  This variable is
-  // updated under a lock but can be read without one.
-  int32_t used_slots_;
-  // The current number of slots for this size class.  This is an
-  // adaptive value that is increased if there is lots of traffic
-  // on a given size class.
-  int32_t cache_size_;
-  // Maximum size of the cache for a given size class.
-  int32_t max_cache_size_;
-};
-
-// Pads each CentralCache object to multiple of 64 bytes.  Since some
-// compilers (such as MSVC) don't like it when the padding is 0, I use
-// template specialization to remove the padding entirely when
-// sizeof(CentralFreeList) is a multiple of 64.
-template<int kFreeListSizeMod64>
-class CentralFreeListPaddedTo : public CentralFreeList {
- private:
-  char pad_[64 - kFreeListSizeMod64];
-};
-
-template<>
-class CentralFreeListPaddedTo<0> : public CentralFreeList {
-};
-
-class CentralFreeListPadded : public CentralFreeListPaddedTo<
-  sizeof(CentralFreeList) % 64> {
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_CENTRAL_FREELIST_H_
diff --git a/contrib/libtcmalloc/src/common.cc b/contrib/libtcmalloc/src/common.cc
deleted file mode 100644
index 313848c37b6..00000000000
--- a/contrib/libtcmalloc/src/common.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include <stdlib.h> // for getenv and strtol
-#include "config.h"
-#include "common.h"
-#include "system-alloc.h"
-#include "base/spinlock.h"
-#include "getenv_safe.h" // TCMallocGetenvSafe
-
-namespace tcmalloc {
-
-// Define the maximum number of object per classe type to transfer between
-// thread and central caches.
-static int32 FLAGS_tcmalloc_transfer_num_objects;
-
-static const int32 kDefaultTransferNumObjecs = 512;
-
-// The init function is provided to explicit initialize the variable value
-// from the env. var to avoid C++ global construction that might defer its
-// initialization after a malloc/new call.
-static inline void InitTCMallocTransferNumObjects()
-{
-  if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
-    const char *envval = TCMallocGetenvSafe("TCMALLOC_TRANSFER_NUM_OBJ");
-    FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
-      strtol(envval, NULL, 10);
-  }
-}
-
-// Note: the following only works for "n"s that fit in 32-bits, but
-// that is fine since we only use it for small sizes.
-static inline int LgFloor(size_t n) {
-  int log = 0;
-  for (int i = 4; i >= 0; --i) {
-    int shift = (1 << i);
-    size_t x = n >> shift;
-    if (x != 0) {
-      n = x;
-      log += shift;
-    }
-  }
-  ASSERT(n == 1);
-  return log;
-}
-
-int AlignmentForSize(size_t size) {
-  int alignment = kAlignment;
-  if (size > kMaxSize) {
-    // Cap alignment at kPageSize for large sizes.
-    alignment = kPageSize;
-  } else if (size >= 128) {
-    // Space wasted due to alignment is at most 1/8, i.e., 12.5%.
-    alignment = (1 << LgFloor(size)) / 8;
-  } else if (size >= kMinAlign) {
-    // We need an alignment of at least 16 bytes to satisfy
-    // requirements for some SSE types.
-    alignment = kMinAlign;
-  }
-  // Maximum alignment allowed is page size alignment.
-  if (alignment > kPageSize) {
-    alignment = kPageSize;
-  }
-  CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign);
-  CHECK_CONDITION((alignment & (alignment - 1)) == 0);
-  return alignment;
-}
-
-int SizeMap::NumMoveSize(size_t size) {
-  if (size == 0) return 0;
-  // Use approx 64k transfers between thread and central caches.
-  int num = static_cast<int>(64.0 * 1024.0 / size);
-  if (num < 2) num = 2;
-
-  // Avoid bringing too many objects into small object free lists.
-  // If this value is too large:
-  // - We waste memory with extra objects sitting in the thread caches.
-  // - The central freelist holds its lock for too long while
-  //   building a linked list of objects, slowing down the allocations
-  //   of other threads.
-  // If this value is too small:
-  // - We go to the central freelist too often and we have to acquire
-  //   its lock each time.
-  // This value strikes a balance between the constraints above.
-  if (num > FLAGS_tcmalloc_transfer_num_objects)
-    num = FLAGS_tcmalloc_transfer_num_objects;
-
-  return num;
-}
-
-// Initialize the mapping arrays
-void SizeMap::Init() {
-  InitTCMallocTransferNumObjects();
-
-  // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
-  if (ClassIndex(0) != 0) {
-    Log(kCrash, __FILE__, __LINE__,
-        "Invalid class index for size 0", ClassIndex(0));
-  }
-  if (ClassIndex(kMaxSize) >= sizeof(class_array_)) {
-    Log(kCrash, __FILE__, __LINE__,
-        "Invalid class index for kMaxSize", ClassIndex(kMaxSize));
-  }
-
-  // Compute the size classes we want to use
-  int sc = 1;   // Next size class to assign
-  int alignment = kAlignment;
-  CHECK_CONDITION(kAlignment <= kMinAlign);
-  for (size_t size = kAlignment; size <= kMaxSize; size += alignment) {
-    alignment = AlignmentForSize(size);
-    CHECK_CONDITION((size % alignment) == 0);
-
-    int blocks_to_move = NumMoveSize(size) / 4;
-    size_t psize = 0;
-    do {
-      psize += kPageSize;
-      // Allocate enough pages so leftover is less than 1/8 of total.
-      // This bounds wasted space to at most 12.5%.
-      while ((psize % size) > (psize >> 3)) {
-        psize += kPageSize;
-      }
-      // Continue to add pages until there are at least as many objects in
-      // the span as are needed when moving objects from the central
-      // freelists and spans to the thread caches.
-    } while ((psize / size) < (blocks_to_move));
-    const size_t my_pages = psize >> kPageShift;
-
-    if (sc > 1 && my_pages == class_to_pages_[sc-1]) {
-      // See if we can merge this into the previous class without
-      // increasing the fragmentation of the previous class.
-      const size_t my_objects = (my_pages << kPageShift) / size;
-      const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift)
-                                  / class_to_size_[sc-1];
-      if (my_objects == prev_objects) {
-        // Adjust last class to include this size
-        class_to_size_[sc-1] = size;
-        continue;
-      }
-    }
-
-    // Add new class
-    class_to_pages_[sc] = my_pages;
-    class_to_size_[sc] = size;
-    sc++;
-  }
-  if (sc != kNumClasses) {
-    Log(kCrash, __FILE__, __LINE__,
-        "wrong number of size classes: (found vs. expected )", sc, kNumClasses);
-  }
-
-  // Initialize the mapping arrays
-  int next_size = 0;
-  for (int c = 1; c < kNumClasses; c++) {
-    const int max_size_in_class = class_to_size_[c];
-    for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
-      class_array_[ClassIndex(s)] = c;
-    }
-    next_size = max_size_in_class + kAlignment;
-  }
-
-  // Double-check sizes just to be safe
-  for (size_t size = 0; size <= kMaxSize;) {
-    const int sc = SizeClass(size);
-    if (sc <= 0 || sc >= kNumClasses) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Bad size class (class, size)", sc, size);
-    }
-    if (sc > 1 && size <= class_to_size_[sc-1]) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Allocating unnecessarily large class (class, size)", sc, size);
-    }
-    const size_t s = class_to_size_[sc];
-    if (size > s || s == 0) {
-      Log(kCrash, __FILE__, __LINE__,
-          "Bad (class, size, requested)", sc, s, size);
-    }
-    if (size <= kMaxSmallSize) {
-      size += 8;
-    } else {
-      size += 128;
-    }
-  }
-
-  // Initialize the num_objects_to_move array.
-  for (size_t cl = 1; cl  < kNumClasses; ++cl) {
-    num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl));
-  }
-}
-
-// Metadata allocator -- keeps stats about how many bytes allocated.
-static uint64_t metadata_system_bytes_ = 0;
-static const size_t kMetadataAllocChunkSize = 8*1024*1024;
-// As ThreadCache objects are allocated with MetaDataAlloc, and also
-// CACHELINE_ALIGNED, we must use the same alignment as TCMalloc_SystemAlloc.
-static const size_t kMetadataAllignment = sizeof(MemoryAligner);
-
-static char *metadata_chunk_alloc_;
-static size_t metadata_chunk_avail_;
-
-static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
-
-void* MetaDataAlloc(size_t bytes) {
-  if (bytes >= kMetadataAllocChunkSize) {
-    void *rv = TCMalloc_SystemAlloc(bytes,
-                                    NULL, kMetadataAllignment);
-    if (rv != NULL) {
-      metadata_system_bytes_ += bytes;
-    }
-    return rv;
-  }
-
-  SpinLockHolder h(&metadata_alloc_lock);
-
-  // the following works by essentially turning address to integer of
-  // log_2 kMetadataAllignment size and negating it. I.e. negated
-  // value + original value gets 0 and that's what we want modulo
-  // kMetadataAllignment. Note, we negate before masking higher bits
-  // off, otherwise we'd have to mask them off after negation anyways.
-  intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1);
-
-  if (metadata_chunk_avail_ < bytes + alignment) {
-    size_t real_size;
-    void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
-                                     &real_size, kMetadataAllignment);
-    if (ptr == NULL) {
-      return NULL;
-    }
-
-    metadata_chunk_alloc_ = static_cast<char *>(ptr);
-    metadata_chunk_avail_ = real_size;
-
-    alignment = 0;
-  }
-
-  void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment);
-  bytes += alignment;
-  metadata_chunk_alloc_ += bytes;
-  metadata_chunk_avail_ -= bytes;
-  metadata_system_bytes_ += bytes;
-  return rv;
-}
-
-uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/common.h b/contrib/libtcmalloc/src/common.h
deleted file mode 100644
index e8a1ba6972c..00000000000
--- a/contrib/libtcmalloc/src/common.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Common definitions for tcmalloc code.
-
-#ifndef TCMALLOC_COMMON_H_
-#define TCMALLOC_COMMON_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t, uint64_t
-#endif
-#include "internal_logging.h"  // for ASSERT, etc
-#include "base/basictypes.h"   // for LIKELY, etc
-
-#ifdef HAVE_BUILTIN_EXPECT
-#define LIKELY(x) __builtin_expect(!!(x), 1)
-#define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-// Type that can hold a page number
-typedef uintptr_t PageID;
-
-// Type that can hold the length of a run of pages
-typedef uintptr_t Length;
-
-//-------------------------------------------------------------------
-// Configuration
-//-------------------------------------------------------------------
-
-#if defined(TCMALLOC_ALIGN_8BYTES)
-// Unless we force to use 8 bytes alignment we use an alignment of
-// at least 16 bytes to statisfy requirements for some SSE types.
-// Keep in mind when using the 16 bytes alignment you can have a space
-// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
-static const size_t kMinAlign   = 8;
-// Number of classes created until reach page size 128.
-static const size_t kBaseClasses = 16;
-#else
-static const size_t kMinAlign   = 16;
-static const size_t kBaseClasses = 9;
-#endif
-
-// Using large pages speeds up the execution at a cost of larger memory use.
-// Deallocation may speed up by a factor as the page map gets 8x smaller, so
-// lookups in the page map result in fewer L2 cache misses, which translates to
-// speedup for application/platform combinations with high L2 cache pressure.
-// As the number of size classes increases with large pages, we increase
-// the thread cache allowance to avoid passing more free ranges to and from
-// central lists.  Also, larger pages are less likely to get freed.
-// These two factors cause a bounded increase in memory use.
-#if defined(TCMALLOC_32K_PAGES)
-static const size_t kPageShift  = 15;
-static const size_t kNumClasses = kBaseClasses + 69;
-#elif defined(TCMALLOC_64K_PAGES)
-static const size_t kPageShift  = 16;
-static const size_t kNumClasses = kBaseClasses + 73;
-#else
-static const size_t kPageShift  = 13;
-static const size_t kNumClasses = kBaseClasses + 79;
-#endif
-
-static const size_t kMaxThreadCacheSize = 4 << 20;
-
-static const size_t kPageSize   = 1 << kPageShift;
-static const size_t kMaxSize    = 256 * 1024;
-static const size_t kAlignment  = 8;
-static const size_t kLargeSizeClass = 0;
-// For all span-lengths < kMaxPages we keep an exact-size list.
-static const size_t kMaxPages = 1 << (20 - kPageShift);
-
-// Default bound on the total amount of thread caches.
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-// Make the overall thread cache no bigger than that of a single thread
-// for the small memory footprint case.
-static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
-#else
-static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
-#endif
-
-// Lower bound on the per-thread cache sizes
-static const size_t kMinThreadCacheSize = kMaxSize * 2;
-
-// The number of bytes one ThreadCache will steal from another when
-// the first ThreadCache is forced to Scavenge(), delaying the
-// next call to Scavenge for this thread.
-static const size_t kStealAmount = 1 << 16;
-
-// The number of times that a deallocation can cause a freelist to
-// go over its max_length() before shrinking max_length().
-static const int kMaxOverages = 3;
-
-// Maximum length we allow a per-thread free-list to have before we
-// move objects from it into the corresponding central free-list.  We
-// want this big to avoid locking the central free-list too often.  It
-// should not hurt to make this list somewhat big because the
-// scavenging code will shrink it down when its contents are not in use.
-static const int kMaxDynamicFreeListLength = 8192;
-
-static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
-
-#if defined __x86_64__
-// All current and planned x86_64 processors only look at the lower 48 bits
-// in virtual to physical address translation.  The top 16 are thus unused.
-// TODO(rus): Under what operating systems can we increase it safely to 17?
-// This lets us use smaller page maps.  On first allocation, a 36-bit page map
-// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
-static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
-#else
-static const int kAddressBits = 8 * sizeof(void*);
-#endif
-
-namespace tcmalloc {
-
-// Convert byte size into pages.  This won't overflow, but may return
-// an unreasonably large value if bytes is huge enough.
-inline Length pages(size_t bytes) {
-  return (bytes >> kPageShift) +
-      ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
-}
-
-// For larger allocation sizes, we use larger memory alignments to
-// reduce the number of size classes.
-int AlignmentForSize(size_t size);
-
-// Size-class information + mapping
-class SizeMap {
- private:
-  // Number of objects to move between a per-thread list and a central
-  // list in one shot.  We want this to be not too small so we can
-  // amortize the lock overhead for accessing the central list.  Making
-  // it too big may temporarily cause unnecessary memory wastage in the
-  // per-thread free list until the scavenger cleans up the list.
-  int num_objects_to_move_[kNumClasses];
-
-  //-------------------------------------------------------------------
-  // Mapping from size to size_class and vice versa
-  //-------------------------------------------------------------------
-
-  // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
-  // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
-  // So for these larger sizes we have an array indexed by ceil(size/128).
-  //
-  // We flatten both logical arrays into one physical array and use
-  // arithmetic to compute an appropriate index.  The constants used by
-  // ClassIndex() were selected to make the flattening work.
-  //
-  // Examples:
-  //   Size       Expression                      Index
-  //   -------------------------------------------------------
-  //   0          (0 + 7) / 8                     0
-  //   1          (1 + 7) / 8                     1
-  //   ...
-  //   1024       (1024 + 7) / 8                  128
-  //   1025       (1025 + 127 + (120<<7)) / 128   129
-  //   ...
-  //   32768      (32768 + 127 + (120<<7)) / 128  376
-  static const int kMaxSmallSize = 1024;
-  static const size_t kClassArraySize =
-      ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
-  unsigned char class_array_[kClassArraySize];
-
-  static inline size_t SmallSizeClass(size_t s) {
-    return (static_cast<uint32_t>(s) + 7) >> 3;
-  }
-
-  static inline size_t LargeSizeClass(size_t s) {
-    return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
-  }
-
-  // Compute index of the class_array[] entry for a given size
-  static inline size_t ClassIndex(size_t s) {
-    // Use unsigned arithmetic to avoid unnecessary sign extensions.
-    ASSERT(0 <= s);
-    ASSERT(s <= kMaxSize);
-    if (LIKELY(s <= kMaxSmallSize)) {
-      return SmallSizeClass(s);
-    } else {
-      return LargeSizeClass(s);
-    }
-  }
-
-  int NumMoveSize(size_t size);
-
-  // Mapping from size class to max size storable in that class
-  size_t class_to_size_[kNumClasses];
-
-  // Mapping from size class to number of pages to allocate at a time
-  size_t class_to_pages_[kNumClasses];
-
- public:
-  // Constructor should do nothing since we rely on explicit Init()
-  // call, which may or may not be called before the constructor runs.
-  SizeMap() { }
-
-  // Initialize the mapping arrays
-  void Init();
-
-  inline int SizeClass(size_t size) {
-    return class_array_[ClassIndex(size)];
-  }
-
-  inline bool MaybeSizeClass(size_t size, size_t *size_class) {
-    size_t class_idx;
-    if (LIKELY(size <= kMaxSmallSize)) {
-      class_idx = SmallSizeClass(size);
-    } else if (size <= kMaxSize) {
-      class_idx = LargeSizeClass(size);
-    } else {
-      return false;
-    }
-    *size_class = class_array_[class_idx];
-    return true;
-  }
-
-  // Get the byte-size for a specified class
-  inline size_t ByteSizeForClass(size_t cl) {
-    return class_to_size_[cl];
-  }
-
-  // Mapping from size class to max size storable in that class
-  inline size_t class_to_size(size_t cl) {
-    return class_to_size_[cl];
-  }
-
-  // Mapping from size class to number of pages to allocate at a time
-  inline size_t class_to_pages(size_t cl) {
-    return class_to_pages_[cl];
-  }
-
-  // Number of objects to move between a per-thread list and a central
-  // list in one shot.  We want this to be not too small so we can
-  // amortize the lock overhead for accessing the central list.  Making
-  // it too big may temporarily cause unnecessary memory wastage in the
-  // per-thread free list until the scavenger cleans up the list.
-  inline int num_objects_to_move(size_t cl) {
-    return num_objects_to_move_[cl];
-  }
-};
-
-// Allocates "bytes" worth of memory and returns it.  Increments
-// metadata_system_bytes appropriately.  May return NULL if allocation
-// fails.  Requires pageheap_lock is held.
-void* MetaDataAlloc(size_t bytes);
-
-// Returns the total number of bytes allocated from the system.
-// Requires pageheap_lock is held.
-uint64_t metadata_system_bytes();
-
-// size/depth are made the same size as a pointer so that some generic
-// code below can conveniently cast them back and forth to void*.
-static const int kMaxStackDepth = 31;
-struct StackTrace {
-  uintptr_t size;          // Size of object
-  uintptr_t depth;         // Number of PC values stored in array below
-  void*     stack[kMaxStackDepth];
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_COMMON_H_
diff --git a/contrib/libtcmalloc/src/config.h b/contrib/libtcmalloc/src/config.h
deleted file mode 100644
index 9f9a7a259e5..00000000000
--- a/contrib/libtcmalloc/src/config.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/* src/config.h.  Generated from config.h.in by configure.  */
-/* src/config.h.in.  Generated from configure.ac by autoheader.  */
-
-
-#ifndef GPERFTOOLS_CONFIG_H_
-#define GPERFTOOLS_CONFIG_H_
-
-
-/* Build runtime detection for sized delete */
-/* #undef ENABLE_DYNAMIC_SIZED_DELETE */
-
-/* Build sized deletion operators */
-/* #undef ENABLE_SIZED_DELETE */
-
-/* Define to 1 if compiler supports __builtin_expect */
-#if _MSC_VER
-#define HAVE_BUILTIN_EXPECT 0
-#else
-#define HAVE_BUILTIN_EXPECT 1
-#endif
-
-/* Define to 1 if compiler supports __builtin_stack_pointer */
-/* #undef HAVE_BUILTIN_STACK_POINTER */
-
-/* Define to 1 if you have the <conflict-signal.h> header file. */
-/* #undef HAVE_CONFLICT_SIGNAL_H */
-
-/* Define to 1 if you have the <cygwin/signal.h> header file. */
-/* #undef HAVE_CYGWIN_SIGNAL_H */
-
-/* Define to 1 if you have the declaration of `backtrace', and to 0 if you
-   don't. */
-/* #undef HAVE_DECL_BACKTRACE */
-
-/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't.
-   */
-#define HAVE_DECL_CFREE 1
-
-/* Define to 1 if you have the declaration of `memalign', and to 0 if you
-   don't. */
-#define HAVE_DECL_MEMALIGN 1
-
-/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you
-   don't. */
-/* #undef HAVE_DECL_NANOSLEEP */
-
-/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if
-   you don't. */
-#define HAVE_DECL_POSIX_MEMALIGN 1
-
-/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you
-   don't. */
-#define HAVE_DECL_PVALLOC 1
-
-/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't.
-   */
-/* #undef HAVE_DECL_SLEEP */
-
-/* Define to 1 if you have the declaration of `uname', and to 0 if you don't.
-   */
-#define HAVE_DECL_UNAME 1
-
-/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't.
-   */
-#define HAVE_DECL_VALLOC 1
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#define HAVE_DLFCN_H 1
-
-/* Define to 1 if the system has the type `Elf32_Versym'. */
-#define HAVE_ELF32_VERSYM 1
-
-/* Define to 1 if you have the <execinfo.h> header file. */
-#define HAVE_EXECINFO_H 1
-
-/* Define to 1 if you have the <fcntl.h> header file. */
-#define HAVE_FCNTL_H 1
-
-/* Define to 1 if you have the <features.h> header file. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_FEATURES_H 1
-#endif
-
-/* Define to 1 if you have the `fork' function. */
-#define HAVE_FORK 1
-
-/* Define to 1 if you have the `geteuid' function. */
-#define HAVE_GETEUID 1
-
-/* Define to 1 if you have the `getpagesize' function. */
-#define HAVE_GETPAGESIZE 1
-
-/* Define to 1 if you have the <glob.h> header file. */
-#define HAVE_GLOB_H 1
-
-/* Define to 1 if you have the <grp.h> header file. */
-#define HAVE_GRP_H 1
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
-/* Define to 1 if you have the <libunwind.h> header file. */
-//#define HAVE_LIBUNWIND_H 1
-
-/* Define to 1 if you have the <linux/ptrace.h> header file. */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_LINUX_PTRACE_H 1
-#endif
-
-/* Define if this is Linux that has SIGEV_THREAD_ID */
-#define HAVE_LINUX_SIGEV_THREAD_ID 1
-
-/* Define to 1 if you have the <malloc.h> header file. */
-#if !defined(__FreeBSD__)
-#define HAVE_MALLOC_H 1
-#endif
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have a working `mmap' system call. */
-#define HAVE_MMAP 1
-
-/* define if the compiler implements namespaces */
-#define HAVE_NAMESPACES 1
-
-/* Define to 1 if you have the <poll.h> header file. */
-#define HAVE_POLL_H 1
-
-/* define if libc has program_invocation_name */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE_PROGRAM_INVOCATION_NAME 1
-#endif
-
-/* Define if you have POSIX threads libraries and header files. */
-#define HAVE_PTHREAD 1
-
-/* defined to 1 if pthread symbols are exposed even without include pthread.h
-   */
-/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */
-
-/* Define to 1 if you have the <pwd.h> header file. */
-#define HAVE_PWD_H 1
-
-/* Define to 1 if you have the `sbrk' function. */
-#define HAVE_SBRK 1
-
-/* Define to 1 if you have the <sched.h> header file. */
-#define HAVE_SCHED_H 1
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if the system has the type `struct mallinfo'. */
-//#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#if !defined(__APPLE__)
-#define HAVE_STRUCT_MALLINFO 1
-#endif
-
-/* Define to 1 if you have the <sys/cdefs.h> header file. */
-#define HAVE_SYS_CDEFS_H 1
-
-/* Define to 1 if you have the <sys/param.h> header file. */
-#define HAVE_SYS_PARAM_H 1
-
-/* Define to 1 if you have the <sys/prctl.h> header file. */
-#define HAVE_SYS_PRCTL_H 1
-
-/* Define to 1 if you have the <sys/resource.h> header file. */
-#define HAVE_SYS_RESOURCE_H 1
-
-/* Define to 1 if you have the <sys/socket.h> header file. */
-#define HAVE_SYS_SOCKET_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/syscall.h> header file. */
-#define HAVE_SYS_SYSCALL_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <sys/ucontext.h> header file. */
-/* #undef HAVE_SYS_UCONTEXT_H */
-
-/* Define to 1 if you have the <sys/wait.h> header file. */
-#define HAVE_SYS_WAIT_H 1
-
-/* Define to 1 if compiler supports __thread */
-#define HAVE_TLS 1
-
-/* Define to 1 if you have the <ucontext.h> header file. */
-/* #undef HAVE_UCONTEXT_H */
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Whether <unwind.h> contains _Unwind_Backtrace */
-#define HAVE_UNWIND_BACKTRACE 1
-
-/* Define to 1 if you have the <unwind.h> header file. */
-#define HAVE_UNWIND_H 1
-
-/* Define to 1 if you have the <valgrind.h> header file. */
-/* #undef HAVE_VALGRIND_H */
-
-/* define if your compiler has __attribute__ */
-#define HAVE___ATTRIBUTE__ 1
-
-/* Define to 1 if compiler supports __environ */
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
-#define HAVE___ENVIRON 1
-#endif
-
-/* Define to 1 if the system has the type `__int64'. */
-/* #undef HAVE___INT64 */
-
-/* prefix where we look for installed files */
-#define INSTALL_PREFIX "/usr/local"
-
-/* Define to 1 if int32_t is equivalent to intptr_t */
-/* #undef INT32_EQUALS_INTPTR */
-
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
-#define LT_OBJDIR ".libs/"
-
-/* Name of package */
-#define PACKAGE "gperftools"
-
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT "gperftools@googlegroups.com"
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "gperftools"
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "gperftools 2.5"
-
-/* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "gperftools"
-
-/* Define to the home page for this package. */
-#define PACKAGE_URL ""
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.5"
-
-/* How to access the PC from a struct ucontext */
-/* #undef PC_FROM_UCONTEXT */
-
-/* Always the empty-string on non-windows systems. On windows, should be
-   "__declspec(dllexport)". This way, when we compile the dll, we export our
-   functions/classes. It's safe to define this here because config.h is only
-   used internally, to compile the DLL, and every DLL source file #includes
-   "config.h" before anything else. */
-#define PERFTOOLS_DLL_DECL /**/
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIdS "ld"
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIuS "lu"
-
-/* printf format code for printing a size_t and ssize_t */
-#define PRIxS "lx"
-
-/* Mark the systems where we know it's bad if pthreads runs too
-   early before main (before threads are initialized, presumably).  */
-#ifdef __FreeBSD__
-#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1
-#endif
-
-/* Define to necessary symbol if this constant uses a non-standard name on
-   your system. */
-/* #undef PTHREAD_CREATE_JOINABLE */
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
-/* the namespace where STL code like vector<> is defined */
-#define STL_NAMESPACE std
-
-/* Define 32K of internal pages size for tcmalloc */
-/* #undef TCMALLOC_32K_PAGES */
-
-/* Define 64K of internal pages size for tcmalloc */
-/* #undef TCMALLOC_64K_PAGES */
-
-/* Define 8 bytes of allocation alignment for tcmalloc */
-/* #undef TCMALLOC_ALIGN_8BYTES */
-
-/* Version number of package */
-#define VERSION "2.5"
-
-/* C99 says: define this to get the PRI... macros from stdint.h */
-#ifndef __STDC_FORMAT_MACROS
-# define __STDC_FORMAT_MACROS 1
-#endif
-
-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-/* #undef inline */
-#endif
-
-
-#ifdef __MINGW32__
-#include "windows/mingw.h"
-#endif
-
-#endif  /* #ifndef GPERFTOOLS_CONFIG_H_ */
-
diff --git a/contrib/libtcmalloc/src/debugallocation.cc b/contrib/libtcmalloc/src/debugallocation.cc
deleted file mode 100644
index 178809bc8a3..00000000000
--- a/contrib/libtcmalloc/src/debugallocation.cc
+++ /dev/null
@@ -1,1500 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2000, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Urs Holzle <opensource@google.com>
-
-#include "config.h"
-#include <errno.h>
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-// We only need malloc.h for struct mallinfo.
-#ifdef HAVE_STRUCT_MALLINFO
-// Malloc can be in several places on older versions of OS X.
-# if defined(HAVE_MALLOC_H)
-# include <malloc.h>
-# elif defined(HAVE_MALLOC_MALLOC_H)
-# include <malloc/malloc.h>
-# elif defined(HAVE_SYS_MALLOC_H)
-# include <sys/malloc.h>
-# endif
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#include <sys/stat.h>
-#include <sys/types.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include <gperftools/malloc_extension.h>
-#include <gperftools/malloc_hook.h>
-#include <gperftools/stacktrace.h>
-#include "addressmap-inl.h"
-#include "base/commandlineflags.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include "base/spinlock.h"
-#include "malloc_hook-inl.h"
-#include "symbolize.h"
-
-// NOTE: due to #define below, tcmalloc.cc will omit tc_XXX
-// definitions. So that debug implementations can be defined
-// instead. We're going to use do_malloc, do_free and other do_XXX
-// functions that are defined in tcmalloc.cc for actual memory
-// management
-#define TCMALLOC_USING_DEBUGALLOCATION
-#include "tcmalloc.cc"
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#pragma GCC diagnostic push
-#ifdef __clang__
-#pragma GCC diagnostic ignored "-Wunused-private-field"
-#pragma GCC diagnostic ignored "-Wgnu-alignof-expression"
-#endif
-
-// ========================================================================= //
-
-DEFINE_bool(malloctrace,
-            EnvToBool("TCMALLOC_TRACE", false),
-            "Enables memory (de)allocation tracing to /tmp/google.alloc.");
-#ifdef HAVE_MMAP
-DEFINE_bool(malloc_page_fence,
-            EnvToBool("TCMALLOC_PAGE_FENCE", false),
-            "Enables putting of memory allocations at page boundaries "
-            "with a guard page following the allocation (to catch buffer "
-            "overruns right when they happen).");
-DEFINE_bool(malloc_page_fence_never_reclaim,
-            EnvToBool("TCMALLOC_PAGE_FRANCE_NEVER_RECLAIM", false),
-            "Enables making the virtual address space inaccessible "
-            "upon a deallocation instead of returning it and reusing later.");
-#else
-DEFINE_bool(malloc_page_fence, false, "Not usable (requires mmap)");
-DEFINE_bool(malloc_page_fence_never_reclaim, false, "Not usable (required mmap)");
-#endif
-DEFINE_bool(malloc_reclaim_memory,
-            EnvToBool("TCMALLOC_RECLAIM_MEMORY", true),
-            "If set to false, we never return memory to malloc "
-            "when an object is deallocated. This ensures that all "
-            "heap object addresses are unique.");
-DEFINE_int32(max_free_queue_size,
-             EnvToInt("TCMALLOC_MAX_FREE_QUEUE_SIZE", 10*1024*1024),
-             "If greater than 0, keep freed blocks in a queue instead of "
-             "releasing them to the allocator immediately.  Release them when "
-             "the total size of all blocks in the queue would otherwise exceed "
-             "this limit.");
-
-DEFINE_bool(symbolize_stacktrace,
-            EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true),
-            "Symbolize the stack trace when provided (on some error exits)");
-
-// If we are LD_PRELOAD-ed against a non-pthreads app, then
-// pthread_once won't be defined.  We declare it here, for that
-// case (with weak linkage) which will cause the non-definition to
-// resolve to NULL.  We can then check for NULL or not in Instance.
-extern "C" int pthread_once(pthread_once_t *, void (*)(void))
-    ATTRIBUTE_WEAK;
-
-// ========================================================================= //
-
-// A safe version of printf() that does not do any allocation and
-// uses very little stack space.
-static void TracePrintf(int fd, const char *fmt, ...)
-  __attribute__ ((__format__ (__printf__, 2, 3)));
-
-// Round "value" up to next "alignment" boundary.
-// Requires that "alignment" be a power of two.
-static intptr_t RoundUp(intptr_t value, intptr_t alignment) {
-  return (value + alignment - 1) & ~(alignment - 1);
-}
-
-// ========================================================================= //
-
-class MallocBlock;
-
-// A circular buffer to hold freed blocks of memory.  MallocBlock::Deallocate
-// (below) pushes blocks into this queue instead of returning them to the
-// underlying allocator immediately.  See MallocBlock::Deallocate for more
-// information.
-//
-// We can't use an STL class for this because we need to be careful not to
-// perform any heap de-allocations in any of the code in this class, since the
-// code in MallocBlock::Deallocate is not re-entrant.
-template <typename QueueEntry>
-class FreeQueue {
- public:
-  FreeQueue() : q_front_(0), q_back_(0) {}
-
-  bool Full() {
-    return (q_front_ + 1) % kFreeQueueSize == q_back_;
-  }
-
-  void Push(const QueueEntry& block) {
-    q_[q_front_] = block;
-    q_front_ = (q_front_ + 1) % kFreeQueueSize;
-  }
-
-  QueueEntry Pop() {
-    RAW_CHECK(q_back_ != q_front_, "Queue is empty");
-    const QueueEntry& ret = q_[q_back_];
-    q_back_ = (q_back_ + 1) % kFreeQueueSize;
-    return ret;
-  }
-
-  size_t size() const {
-    return (q_front_ - q_back_ + kFreeQueueSize) % kFreeQueueSize;
-  }
-
- private:
-  // Maximum number of blocks kept in the free queue before being freed.
-  static const int kFreeQueueSize = 1024;
-
-  QueueEntry q_[kFreeQueueSize];
-  int q_front_;
-  int q_back_;
-};
-
-struct MallocBlockQueueEntry {
-  MallocBlockQueueEntry() : block(NULL), size(0),
-                            num_deleter_pcs(0), deleter_threadid(0) {}
-  MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) {
-    if (FLAGS_max_free_queue_size != 0 && b != NULL) {
-      // Adjust the number of frames to skip (4) if you change the
-      // location of this call.
-      num_deleter_pcs =
-          GetStackTrace(deleter_pcs,
-                        sizeof(deleter_pcs) / sizeof(deleter_pcs[0]),
-                        4);
-      deleter_threadid = pthread_self();
-    } else {
-      num_deleter_pcs = 0;
-      // Zero is an illegal pthread id by my reading of the pthread
-      // implementation:
-      deleter_threadid = 0;
-    }
-  }
-
-  MallocBlock* block;
-  size_t size;
-
-  // When deleted and put in the free queue, we (flag-controlled)
-  // record the stack so that if corruption is later found, we can
-  // print the deleter's stack.  (These three vars add 144 bytes of
-  // overhead under the LP64 data model.)
-  void* deleter_pcs[16];
-  int num_deleter_pcs;
-  pthread_t deleter_threadid;
-};
-
-class MallocBlock {
- public:  // allocation type constants
-
-  // Different allocation types we distinguish.
-  // Note: The lower 4 bits are not random: we index kAllocName array
-  // by these values masked with kAllocTypeMask;
-  // the rest are "random" magic bits to help catch memory corruption.
-  static const int kMallocType = 0xEFCDAB90;
-  static const int kNewType = 0xFEBADC81;
-  static const int kArrayNewType = 0xBCEADF72;
-
- private:  // constants
-
-  // A mask used on alloc types above to get to 0, 1, 2
-  static const int kAllocTypeMask = 0x3;
-  // An additional bit to set in AllocType constants
-  // to mark now deallocated regions.
-  static const int kDeallocatedTypeBit = 0x4;
-
-  // For better memory debugging, we initialize all storage to known
-  // values, and overwrite the storage when it's deallocated:
-  // Byte that fills uninitialized storage.
-  static const int kMagicUninitializedByte = 0xAB;
-  // Byte that fills deallocated storage.
-  // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte
-  //       to work around a bug in the pthread library.
-  static const int kMagicDeletedByte = 0xCD;
-  // A size_t (type of alloc_type_ below) in a deallocated storage
-  // filled with kMagicDeletedByte.
-  static const size_t kMagicDeletedSizeT =
-      0xCDCDCDCD | (((size_t)0xCDCDCDCD << 16) << 16);
-    // Initializer works for 32 and 64 bit size_ts;
-    // "<< 16 << 16" is to fool gcc from issuing a warning
-    // when size_ts are 32 bits.
-
-  // NOTE: on Linux, you can enable malloc debugging support in libc by
-  // setting the environment variable MALLOC_CHECK_ to 1 before you
-  // start the program (see man malloc).
-
-  // We use either do_malloc or mmap to make the actual allocation. In
-  // order to remember which one of the two was used for any block, we store an
-  // appropriate magic word next to the block.
-  static const size_t kMagicMalloc = 0xDEADBEEF;
-  static const size_t kMagicMMap = 0xABCDEFAB;
-
-  // This array will be filled with 0xCD, for use with memcmp.
-  static unsigned char kMagicDeletedBuffer[1024];
-  static pthread_once_t deleted_buffer_initialized_;
-  static bool deleted_buffer_initialized_no_pthreads_;
-
- private:  // data layout
-
-                    // The four fields size1_,offset_,magic1_,alloc_type_
-                    // should together occupy a multiple of 16 bytes. (At the
-                    // moment, sizeof(size_t) == 4 or 8 depending on piii vs
-                    // k8, and 4 of those sum to 16 or 32 bytes).
-                    // This, combined with do_malloc's alignment guarantees,
-                    // ensures that SSE types can be stored into the returned
-                    // block, at &size2_.
-  size_t size1_;
-  size_t offset_;   // normally 0 unless memaligned memory
-                    // see comments in memalign() and FromRawPointer().
-  size_t magic1_;
-  size_t alloc_type_;
-  // here comes the actual data (variable length)
-  // ...
-  // then come the size2_ and magic2_, or a full page of mprotect-ed memory
-  // if the malloc_page_fence feature is enabled.
-  size_t size2_;
-  size_t magic2_;
-
- private:  // static data and helpers
-
-  // Allocation map: stores the allocation type for each allocated object,
-  // or the type or'ed with kDeallocatedTypeBit
-  // for each formerly allocated object.
-  typedef AddressMap<int> AllocMap;
-  static AllocMap* alloc_map_;
-  // This protects alloc_map_ and consistent state of metadata
-  // for each still-allocated object in it.
-  // We use spin locks instead of pthread_mutex_t locks
-  // to prevent crashes via calls to pthread_mutex_(un)lock
-  // for the (de)allocations coming from pthreads initialization itself.
-  static SpinLock alloc_map_lock_;
-
-  // A queue of freed blocks.  Instead of releasing blocks to the allocator
-  // immediately, we put them in a queue, freeing them only when necessary
-  // to keep the total size of all the freed blocks below the limit set by
-  // FLAGS_max_free_queue_size.
-  static FreeQueue<MallocBlockQueueEntry>* free_queue_;
-
-  static size_t free_queue_size_;  // total size of blocks in free_queue_
-  // protects free_queue_ and free_queue_size_
-  static SpinLock free_queue_lock_;
-
-  // Names of allocation types (kMallocType, kNewType, kArrayNewType)
-  static const char* const kAllocName[];
-  // Names of corresponding deallocation types
-  static const char* const kDeallocName[];
-
-  static const char* AllocName(int type) {
-    return kAllocName[type & kAllocTypeMask];
-  }
-
-  static const char* DeallocName(int type) {
-    return kDeallocName[type & kAllocTypeMask];
-  }
-
- private:  // helper accessors
-
-  bool IsMMapped() const { return kMagicMMap == magic1_; }
-
-  bool IsValidMagicValue(size_t value) const {
-    return kMagicMMap == value  ||  kMagicMalloc == value;
-  }
-
-  static size_t real_malloced_size(size_t size) {
-    return size + sizeof(MallocBlock);
-  }
-
-  /*
-   * Here we assume size of page is kMinAlign aligned,
-   * so if size is MALLOC_ALIGNMENT aligned too, then we could
-   * guarantee return address is also kMinAlign aligned, because
-   * mmap return address at nearby page boundary on Linux.
-   */
-  static size_t real_mmapped_size(size_t size) {
-    size_t tmp = size + MallocBlock::data_offset();
-    tmp = RoundUp(tmp, kMinAlign);
-    return tmp;
-  }
-
-  size_t real_size() {
-    return IsMMapped() ? real_mmapped_size(size1_) : real_malloced_size(size1_);
-  }
-
-  // NOTE: if the block is mmapped (that is, we're using the
-  // malloc_page_fence option) then there's no size2 or magic2
-  // (instead, the guard page begins where size2 would be).
-
-  size_t* size2_addr() { return (size_t*)((char*)&size2_ + size1_); }
-  const size_t* size2_addr() const {
-    return (const size_t*)((char*)&size2_ + size1_);
-  }
-
-  size_t* magic2_addr() { return (size_t*)(size2_addr() + 1); }
-  const size_t* magic2_addr() const { return (const size_t*)(size2_addr() + 1); }
-
- private:  // other helpers
-
-  void Initialize(size_t size, int type) {
-    RAW_CHECK(IsValidMagicValue(magic1_), "");
-    // record us as allocated in the map
-    alloc_map_lock_.Lock();
-    if (!alloc_map_) {
-      void* p = do_malloc(sizeof(AllocMap));
-      alloc_map_ = new(p) AllocMap(do_malloc, do_free);
-    }
-    alloc_map_->Insert(data_addr(), type);
-    // initialize us
-    size1_ = size;
-    offset_ = 0;
-    alloc_type_ = type;
-    if (!IsMMapped()) {
-      bit_store(magic2_addr(), &magic1_);
-      bit_store(size2_addr(), &size);
-    }
-    alloc_map_lock_.Unlock();
-    memset(data_addr(), kMagicUninitializedByte, size);
-    if (!IsMMapped()) {
-      RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold");
-      RAW_CHECK(memcmp(&magic1_, magic2_addr(), sizeof(magic1_)) == 0, "should hold");
-    }
-  }
-
-  size_t CheckAndClear(int type, size_t given_size) {
-    alloc_map_lock_.Lock();
-    CheckLocked(type);
-    if (!IsMMapped()) {
-      RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold");
-    }
-    // record us as deallocated in the map
-    alloc_map_->Insert(data_addr(), type | kDeallocatedTypeBit);
-    alloc_map_lock_.Unlock();
-    // clear us
-    const size_t size = real_size();
-    RAW_CHECK(!given_size || given_size == size1_,
-              "right size must be passed to sized delete");
-    memset(this, kMagicDeletedByte, size);
-    return size;
-  }
-
-  void CheckLocked(int type) const {
-    int map_type = 0;
-    const int* found_type =
-      alloc_map_ != NULL ? alloc_map_->Find(data_addr()) : NULL;
-    if (found_type == NULL) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p "
-                     "has never been allocated", data_addr());
-    } else {
-      map_type = *found_type;
-    }
-    if ((map_type & kDeallocatedTypeBit) != 0) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p "
-                     "has been already deallocated (it was allocated with %s)",
-                     data_addr(), AllocName(map_type & ~kDeallocatedTypeBit));
-    }
-    if (alloc_type_ == kMagicDeletedSizeT) {
-      RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                     "has been corrupted; or else the object has been already "
-                     "deallocated and our memory map has been corrupted",
-                     data_addr());
-    }
-    if (!IsValidMagicValue(magic1_)) {
-      RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                     "has been corrupted; "
-                     "or else our memory map has been corrupted and this is a "
-                     "deallocation for not (currently) heap-allocated object",
-                     data_addr());
-    }
-    if (!IsMMapped()) {
-      if (memcmp(&size1_, size2_addr(), sizeof(size1_))) {
-        RAW_LOG(FATAL, "memory stomping bug: a word after object at %p "
-                       "has been corrupted", data_addr());
-      }
-      size_t addr;
-      bit_store(&addr, magic2_addr());
-      if (!IsValidMagicValue(addr)) {
-        RAW_LOG(FATAL, "memory stomping bug: a word after object at %p "
-                "has been corrupted", data_addr());
-      }
-    }
-    if (alloc_type_ != type) {
-      if ((alloc_type_ != MallocBlock::kMallocType) &&
-          (alloc_type_ != MallocBlock::kNewType)    &&
-          (alloc_type_ != MallocBlock::kArrayNewType)) {
-        RAW_LOG(FATAL, "memory stomping bug: a word before object at %p "
-                       "has been corrupted", data_addr());
-      }
-      RAW_LOG(FATAL, "memory allocation/deallocation mismatch at %p: "
-                     "allocated with %s being deallocated with %s",
-                     data_addr(), AllocName(alloc_type_), DeallocName(type));
-    }
-    if (alloc_type_ != map_type) {
-      RAW_LOG(FATAL, "memory stomping bug: our memory map has been corrupted : "
-                     "allocation at %p made with %s "
-                     "is recorded in the map to be made with %s",
-                     data_addr(), AllocName(alloc_type_),  AllocName(map_type));
-    }
-  }
-
- public:  // public accessors
-
-  void* data_addr() { return (void*)&size2_; }
-  const void* data_addr() const { return (const void*)&size2_; }
-
-  static size_t data_offset() { return OFFSETOF_MEMBER(MallocBlock, size2_); }
-
-  size_t data_size() const { return size1_; }
-
-  void set_offset(int offset) { this->offset_ = offset; }
-
- public:  // our main interface
-
-  static MallocBlock* Allocate(size_t size, int type) {
-    // Prevent an integer overflow / crash with large allocation sizes.
-    // TODO - Note that for a e.g. 64-bit size_t, max_size_t may not actually
-    // be the maximum value, depending on how the compiler treats ~0. The worst
-    // practical effect is that allocations are limited to 4Gb or so, even if
-    // the address space could take more.
-    static size_t max_size_t = ~0;
-    if (size > max_size_t - sizeof(MallocBlock)) {
-      RAW_LOG(ERROR, "Massive size passed to malloc: %" PRIuS "", size);
-      return NULL;
-    }
-    MallocBlock* b = NULL;
-    const bool use_malloc_page_fence = FLAGS_malloc_page_fence;
-#ifdef HAVE_MMAP
-    if (use_malloc_page_fence) {
-      // Put the block towards the end of the page and make the next page
-      // inaccessible. This will catch buffer overrun right when it happens.
-      size_t sz = real_mmapped_size(size);
-      int pagesize = getpagesize();
-      int num_pages = (sz + pagesize - 1) / pagesize + 1;
-      char* p = (char*) mmap(NULL, num_pages * pagesize, PROT_READ|PROT_WRITE,
-                             MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-      if (p == MAP_FAILED) {
-        // If the allocation fails, abort rather than returning NULL to
-        // malloc. This is because in most cases, the program will run out
-        // of memory in this mode due to tremendous amount of wastage. There
-        // is no point in propagating the error elsewhere.
-        RAW_LOG(FATAL, "Out of memory: possibly due to page fence overhead: %s",
-                strerror(errno));
-      }
-      // Mark the page after the block inaccessible
-      if (mprotect(p + (num_pages - 1) * pagesize, pagesize, PROT_NONE)) {
-        RAW_LOG(FATAL, "Guard page setup failed: %s", strerror(errno));
-      }
-      b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz);
-    } else {
-      b = (MallocBlock*) do_malloc(real_malloced_size(size));
-    }
-#else
-    b = (MallocBlock*) do_malloc(real_malloced_size(size));
-#endif
-
-    // It would be nice to output a diagnostic on allocation failure
-    // here, but logging (other than FATAL) requires allocating
-    // memory, which could trigger a nasty recursion. Instead, preserve
-    // malloc semantics and return NULL on failure.
-    if (b != NULL) {
-      b->magic1_ = use_malloc_page_fence ? kMagicMMap : kMagicMalloc;
-      b->Initialize(size, type);
-    }
-    return b;
-  }
-
-  void Deallocate(int type, size_t given_size) {
-    if (IsMMapped()) {  // have to do this before CheckAndClear
-#ifdef HAVE_MMAP
-      int size = CheckAndClear(type, given_size);
-      int pagesize = getpagesize();
-      int num_pages = (size + pagesize - 1) / pagesize + 1;
-      char* p = (char*) this;
-      if (FLAGS_malloc_page_fence_never_reclaim  ||
-          !FLAGS_malloc_reclaim_memory) {
-        mprotect(p - (num_pages - 1) * pagesize + size,
-                 num_pages * pagesize, PROT_NONE);
-      } else {
-        munmap(p - (num_pages - 1) * pagesize + size, num_pages * pagesize);
-      }
-#endif
-    } else {
-      const size_t size = CheckAndClear(type, given_size);
-      if (FLAGS_malloc_reclaim_memory) {
-        // Instead of freeing the block immediately, push it onto a queue of
-        // recently freed blocks.  Free only enough blocks to keep from
-        // exceeding the capacity of the queue or causing the total amount of
-        // un-released memory in the queue from exceeding
-        // FLAGS_max_free_queue_size.
-        ProcessFreeQueue(this, size, FLAGS_max_free_queue_size);
-      }
-    }
-  }
-
-  static size_t FreeQueueSize() {
-    SpinLockHolder l(&free_queue_lock_);
-    return free_queue_size_;
-  }
-
-  static void ProcessFreeQueue(MallocBlock* b, size_t size,
-                               int max_free_queue_size) {
-    // MallocBlockQueueEntry are about 144 in size, so we can only
-    // use a small array of them on the stack.
-    MallocBlockQueueEntry entries[4];
-    int num_entries = 0;
-    MallocBlockQueueEntry new_entry(b, size);
-    free_queue_lock_.Lock();
-    if (free_queue_ == NULL)
-      free_queue_ = new FreeQueue<MallocBlockQueueEntry>;
-    RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!");
-
-    if (b != NULL) {
-      free_queue_size_ += size + sizeof(MallocBlockQueueEntry);
-      free_queue_->Push(new_entry);
-    }
-
-    // Free blocks until the total size of unfreed blocks no longer exceeds
-    // max_free_queue_size, and the free queue has at least one free
-    // space in it.
-    while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) {
-      RAW_CHECK(num_entries < arraysize(entries), "entries array overflow");
-      entries[num_entries] = free_queue_->Pop();
-      free_queue_size_ -=
-          entries[num_entries].size + sizeof(MallocBlockQueueEntry);
-      num_entries++;
-      if (num_entries == arraysize(entries)) {
-        // The queue will not be full at this point, so it is ok to
-        // release the lock.  The queue may still contain more than
-        // max_free_queue_size, but this is not a strict invariant.
-        free_queue_lock_.Unlock();
-        for (int i = 0; i < num_entries; i++) {
-          CheckForDanglingWrites(entries[i]);
-          do_free(entries[i].block);
-        }
-        num_entries = 0;
-        free_queue_lock_.Lock();
-      }
-    }
-    RAW_CHECK(free_queue_size_ >= 0, "Free queue size went negative!");
-    free_queue_lock_.Unlock();
-    for (int i = 0; i < num_entries; i++) {
-      CheckForDanglingWrites(entries[i]);
-      do_free(entries[i].block);
-    }
-  }
-
-  static void InitDeletedBuffer() {
-    memset(kMagicDeletedBuffer, kMagicDeletedByte, sizeof(kMagicDeletedBuffer));
-    deleted_buffer_initialized_no_pthreads_ = true;
-  }
-
-  static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) {
-    // Initialize the buffer if necessary.
-    if (pthread_once)
-      pthread_once(&deleted_buffer_initialized_, &InitDeletedBuffer);
-    if (!deleted_buffer_initialized_no_pthreads_) {
-      // This will be the case on systems that don't link in pthreads,
-      // including on FreeBSD where pthread_once has a non-zero address
-      // (but doesn't do anything) even when pthreads isn't linked in.
-      InitDeletedBuffer();
-    }
-
-    const unsigned char* p =
-        reinterpret_cast<unsigned char*>(queue_entry.block);
-
-    static const size_t size_of_buffer = sizeof(kMagicDeletedBuffer);
-    const size_t size = queue_entry.size;
-    const size_t buffers = size / size_of_buffer;
-    const size_t remainder = size % size_of_buffer;
-    size_t buffer_idx;
-    for (buffer_idx = 0; buffer_idx < buffers; ++buffer_idx) {
-      CheckForCorruptedBuffer(queue_entry, buffer_idx, p, size_of_buffer);
-      p += size_of_buffer;
-    }
-    CheckForCorruptedBuffer(queue_entry, buffer_idx, p, remainder);
-  }
-
-  static void CheckForCorruptedBuffer(const MallocBlockQueueEntry& queue_entry,
-                                      size_t buffer_idx,
-                                      const unsigned char* buffer,
-                                      size_t size_of_buffer) {
-    if (memcmp(buffer, kMagicDeletedBuffer, size_of_buffer) == 0) {
-      return;
-    }
-
-    RAW_LOG(ERROR,
-            "Found a corrupted memory buffer in MallocBlock (may be offset "
-            "from user ptr): buffer index: %zd, buffer ptr: %p, size of "
-            "buffer: %zd", buffer_idx, buffer, size_of_buffer);
-
-    // The magic deleted buffer should only be 1024 bytes, but in case
-    // this changes, let's put an upper limit on the number of debug
-    // lines we'll output:
-    if (size_of_buffer <= 1024) {
-      for (int i = 0; i < size_of_buffer; ++i) {
-        if (buffer[i] != kMagicDeletedByte) {
-          RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0x%02x).",
-                  i, buffer[i], kMagicDeletedByte);
-        }
-      }
-    } else {
-      RAW_LOG(ERROR, "Buffer too large to print corruption.");
-    }
-
-    const MallocBlock* b = queue_entry.block;
-    const size_t size = queue_entry.size;
-    if (queue_entry.num_deleter_pcs > 0) {
-      TracePrintf(STDERR_FILENO, "Deleted by thread %p\n",
-                  reinterpret_cast<void*>(
-                      PRINTABLE_PTHREAD(queue_entry.deleter_threadid)));
-
-      // We don't want to allocate or deallocate memory here, so we use
-      // placement-new.  It's ok that we don't destroy this, since we're
-      // just going to error-exit below anyway.  Union is for alignment.
-      union { void* alignment; char buf[sizeof(SymbolTable)]; } tablebuf;
-      SymbolTable* symbolization_table = new (tablebuf.buf) SymbolTable;
-      for (int i = 0; i < queue_entry.num_deleter_pcs; i++) {
-        // Symbolizes the previous address of pc because pc may be in the
-        // next function.  This may happen when the function ends with
-        // a call to a function annotated noreturn (e.g. CHECK).
-        char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]);
-        symbolization_table->Add(pc - 1);
-      }
-      if (FLAGS_symbolize_stacktrace)
-        symbolization_table->Symbolize();
-      for (int i = 0; i < queue_entry.num_deleter_pcs; i++) {
-        char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]);
-        TracePrintf(STDERR_FILENO, "    @ %p %s\n",
-                    pc, symbolization_table->GetSymbol(pc - 1));
-      }
-    } else {
-      RAW_LOG(ERROR,
-              "Skipping the printing of the deleter's stack!  Its stack was "
-              "not found; either the corruption occurred too early in "
-              "execution to obtain a stack trace or --max_free_queue_size was "
-              "set to 0.");
-    }
-
-    RAW_LOG(FATAL,
-            "Memory was written to after being freed.  MallocBlock: %p, user "
-            "ptr: %p, size: %zd.  If you can't find the source of the error, "
-            "try using ASan (http://code.google.com/p/address-sanitizer/), "
-            "Valgrind, or Purify, or study the "
-            "output of the deleter's stack printed above.",
-            b, b->data_addr(), size);
-  }
-
-  static MallocBlock* FromRawPointer(void* p) {
-    const size_t data_offset = MallocBlock::data_offset();
-    // Find the header just before client's memory.
-    MallocBlock *mb = reinterpret_cast<MallocBlock *>(
-                reinterpret_cast<char *>(p) - data_offset);
-    // If mb->alloc_type_ is kMagicDeletedSizeT, we're not an ok pointer.
-    if (mb->alloc_type_ == kMagicDeletedSizeT) {
-      RAW_LOG(FATAL, "memory allocation bug: object at %p has been already"
-                     " deallocated; or else a word before the object has been"
-                     " corrupted (memory stomping bug)", p);
-    }
-    // If mb->offset_ is zero (common case), mb is the real header.
-    // If mb->offset_ is non-zero, this block was allocated by debug
-    // memallign implementation, and mb->offset_ is the distance
-    // backwards to the real header from mb, which is a fake header.
-    if (mb->offset_ == 0) {
-      return mb;
-    }
-
-    MallocBlock *main_block = reinterpret_cast<MallocBlock *>(
-      reinterpret_cast<char *>(mb) - mb->offset_);
-
-    if (main_block->offset_ != 0) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " Need 0 but got %x",
-              (unsigned)(main_block->offset_));
-    }
-    if (main_block >= p) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " Detected main_block address overflow: %x",
-              (unsigned)(mb->offset_));
-    }
-    if (main_block->size2_addr() < p) {
-      RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted."
-              " It points below it's own main_block: %x",
-              (unsigned)(mb->offset_));
-    }
-
-    return main_block;
-  }
-
-  static const MallocBlock* FromRawPointer(const void* p) {
-    // const-safe version: we just cast about
-    return FromRawPointer(const_cast<void*>(p));
-  }
-
-  void Check(int type) const {
-    alloc_map_lock_.Lock();
-    CheckLocked(type);
-    alloc_map_lock_.Unlock();
-  }
-
-  static bool CheckEverything() {
-    alloc_map_lock_.Lock();
-    if (alloc_map_ != NULL)  alloc_map_->Iterate(CheckCallback, 0);
-    alloc_map_lock_.Unlock();
-    return true;  // if we get here, we're okay
-  }
-
-  static bool MemoryStats(int* blocks, size_t* total,
-                          int histogram[kMallocHistogramSize]) {
-    memset(histogram, 0, kMallocHistogramSize * sizeof(int));
-    alloc_map_lock_.Lock();
-    stats_blocks_ = 0;
-    stats_total_ = 0;
-    stats_histogram_ = histogram;
-    if (alloc_map_ != NULL) alloc_map_->Iterate(StatsCallback, 0);
-    *blocks = stats_blocks_;
-    *total = stats_total_;
-    alloc_map_lock_.Unlock();
-    return true;
-  }
-
- private:  // helpers for CheckEverything and MemoryStats
-
-  static void CheckCallback(const void* ptr, int* type, int dummy) {
-    if ((*type & kDeallocatedTypeBit) == 0) {
-      FromRawPointer(ptr)->CheckLocked(*type);
-    }
-  }
-
-  // Accumulation variables for StatsCallback protected by alloc_map_lock_
-  static int stats_blocks_;
-  static size_t stats_total_;
-  static int* stats_histogram_;
-
-  static void StatsCallback(const void* ptr, int* type, int dummy) {
-    if ((*type & kDeallocatedTypeBit) == 0) {
-      const MallocBlock* b = FromRawPointer(ptr);
-      b->CheckLocked(*type);
-      ++stats_blocks_;
-      size_t mysize = b->size1_;
-      int entry = 0;
-      stats_total_ += mysize;
-      while (mysize) {
-        ++entry;
-        mysize >>= 1;
-      }
-      RAW_CHECK(entry < kMallocHistogramSize,
-                "kMallocHistogramSize should be at least as large as log2 "
-                "of the maximum process memory size");
-      stats_histogram_[entry] += 1;
-    }
-  }
-};
-
-void DanglingWriteChecker() {
-  // Clear out the remaining free queue to check for dangling writes.
-  MallocBlock::ProcessFreeQueue(NULL, 0, 0);
-}
-
-// ========================================================================= //
-
-const size_t MallocBlock::kMagicMalloc;
-const size_t MallocBlock::kMagicMMap;
-
-MallocBlock::AllocMap* MallocBlock::alloc_map_ = NULL;
-SpinLock MallocBlock::alloc_map_lock_(SpinLock::LINKER_INITIALIZED);
-
-FreeQueue<MallocBlockQueueEntry>* MallocBlock::free_queue_ = NULL;
-size_t MallocBlock::free_queue_size_ = 0;
-SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED);
-
-unsigned char MallocBlock::kMagicDeletedBuffer[1024];
-pthread_once_t MallocBlock::deleted_buffer_initialized_ = PTHREAD_ONCE_INIT;
-bool MallocBlock::deleted_buffer_initialized_no_pthreads_ = false;
-
-const char* const MallocBlock::kAllocName[] = {
-  "malloc",
-  "new",
-  "new []",
-  NULL,
-};
-
-const char* const MallocBlock::kDeallocName[] = {
-  "free",
-  "delete",
-  "delete []",
-  NULL,
-};
-
-int MallocBlock::stats_blocks_;
-size_t MallocBlock::stats_total_;
-int* MallocBlock::stats_histogram_;
-
-// ========================================================================= //
-
-// The following cut-down version of printf() avoids
-// using stdio or ostreams.
-// This is to guarantee no recursive calls into
-// the allocator and to bound the stack space consumed.  (The pthread
-// manager thread in linuxthreads has a very small stack,
-// so fprintf can't be called.)
-static void TracePrintf(int fd, const char *fmt, ...) {
-  char buf[64];
-  int i = 0;
-  va_list ap;
-  va_start(ap, fmt);
-  const char *p = fmt;
-  char numbuf[25];
-  if (fd < 0) {
-    return;
-  }
-  numbuf[sizeof(numbuf)-1] = 0;
-  while (*p != '\0') {              // until end of format string
-    char *s = &numbuf[sizeof(numbuf)-1];
-    if (p[0] == '%' && p[1] != 0) {  // handle % formats
-      int64 l = 0;
-      unsigned long base = 0;
-      if (*++p == 's') {                            // %s
-        s = va_arg(ap, char *);
-      } else if (*p == 'l' && p[1] == 'd') {        // %ld
-        l = va_arg(ap, long);
-        base = 10;
-        p++;
-      } else if (*p == 'l' && p[1] == 'u') {        // %lu
-        l = va_arg(ap, unsigned long);
-        base = 10;
-        p++;
-      } else if (*p == 'z' && p[1] == 'u') {        // %zu
-        l = va_arg(ap, size_t);
-        base = 10;
-        p++;
-      } else if (*p == 'u') {                       // %u
-        l = va_arg(ap, unsigned int);
-        base = 10;
-      } else if (*p == 'd') {                       // %d
-        l = va_arg(ap, int);
-        base = 10;
-      } else if (*p == 'p') {                       // %p
-        l = va_arg(ap, intptr_t);
-        base = 16;
-      } else {
-        write(STDERR_FILENO, "Unimplemented TracePrintf format\n", 33);
-        write(STDERR_FILENO, p, 2);
-        write(STDERR_FILENO, "\n", 1);
-        abort();
-      }
-      p++;
-      if (base != 0) {
-        bool minus = (l < 0 && base == 10);
-        uint64 ul = minus? -l : l;
-        do {
-          *--s = "0123456789abcdef"[ul % base];
-          ul /= base;
-        } while (ul != 0);
-        if (base == 16) {
-          *--s = 'x';
-          *--s = '0';
-        } else if (minus) {
-          *--s = '-';
-        }
-      }
-    } else {                        // handle normal characters
-      *--s = *p++;
-    }
-    while (*s != 0) {
-      if (i == sizeof(buf)) {
-        write(fd, buf, i);
-        i = 0;
-      }
-      buf[i++] = *s++;
-    }
-  }
-  if (i != 0) {
-    write(fd, buf, i);
-  }
-  va_end(ap);
-}
-
-// Return the file descriptor we're writing a log to
-static int TraceFd() {
-  static int trace_fd = -1;
-  if (trace_fd == -1) {            // Open the trace file on the first call
-    const char *val = getenv("TCMALLOC_TRACE_FILE");
-    bool fallback_to_stderr = false;
-    if (!val) {
-      val = "/tmp/google.alloc";
-      fallback_to_stderr = true;
-    }
-    trace_fd = open(val, O_CREAT|O_TRUNC|O_WRONLY, 0666);
-    if (trace_fd == -1) {
-      if (fallback_to_stderr) {
-        trace_fd = 2;
-        TracePrintf(trace_fd, "Can't open %s.  Logging to stderr.\n", val);
-      } else {
-        TracePrintf(2, "Can't open %s.  Logging disabled.\n", val);
-      }
-    }
-    // Add a header to the log.
-    TracePrintf(trace_fd, "Trace started: %lu\n",
-                static_cast<unsigned long>(time(NULL)));
-    TracePrintf(trace_fd,
-                "func\tsize\tptr\tthread_id\tstack pcs for tools/symbolize\n");
-  }
-  return trace_fd;
-}
-
-// Print the hex stack dump on a single line.   PCs are separated by tabs.
-static void TraceStack(void) {
-  void *pcs[16];
-  int n = GetStackTrace(pcs, sizeof(pcs)/sizeof(pcs[0]), 0);
-  for (int i = 0; i != n; i++) {
-    TracePrintf(TraceFd(), "\t%p", pcs[i]);
-  }
-}
-
-// This protects MALLOC_TRACE, to make sure its info is atomically written.
-static SpinLock malloc_trace_lock(SpinLock::LINKER_INITIALIZED);
-
-#define MALLOC_TRACE(name, size, addr)                                  \
-  do {                                                                  \
-    if (FLAGS_malloctrace) {                                            \
-      SpinLockHolder l(&malloc_trace_lock);                             \
-      TracePrintf(TraceFd(), "%s\t%" PRIuS "\t%p\t%" GPRIuPTHREAD,      \
-                  name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \
-      TraceStack();                                                     \
-      TracePrintf(TraceFd(), "\n");                                     \
-    }                                                                   \
-  } while (0)
-
-// ========================================================================= //
-
-// Write the characters buf[0, ..., size-1] to
-// the malloc trace buffer.
-// This function is intended for debugging,
-// and is not declared in any header file.
-// You must insert a declaration of it by hand when you need
-// to use it.
-void __malloctrace_write(const char *buf, size_t size) {
-  if (FLAGS_malloctrace) {
-    write(TraceFd(), buf, size);
-  }
-}
-
-// ========================================================================= //
-
-// General debug allocation/deallocation
-
-static inline void* DebugAllocate(size_t size, int type) {
-  MallocBlock* ptr = MallocBlock::Allocate(size, type);
-  if (ptr == NULL)  return NULL;
-  MALLOC_TRACE("malloc", size, ptr->data_addr());
-  return ptr->data_addr();
-}
-
-static inline void DebugDeallocate(void* ptr, int type, size_t given_size) {
-  MALLOC_TRACE("free",
-               (ptr != 0 ? MallocBlock::FromRawPointer(ptr)->data_size() : 0),
-               ptr);
-  if (ptr)  MallocBlock::FromRawPointer(ptr)->Deallocate(type, given_size);
-}
-
-// ========================================================================= //
-
-// The following functions may be called via MallocExtension::instance()
-// for memory verification and statistics.
-class DebugMallocImplementation : public TCMallocImplementation {
- public:
-  virtual bool GetNumericProperty(const char* name, size_t* value) {
-    bool result = TCMallocImplementation::GetNumericProperty(name, value);
-    if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) {
-      // Subtract bytes kept in the free queue
-      size_t qsize = MallocBlock::FreeQueueSize();
-      if (*value >= qsize) {
-        *value -= qsize;
-      }
-    }
-    return result;
-  }
-
-  virtual bool VerifyNewMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType);
-    return true;
-  }
-
-  virtual bool VerifyArrayNewMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType);
-    return true;
-  }
-
-  virtual bool VerifyMallocMemory(const void* p) {
-    if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType);
-    return true;
-  }
-
-  virtual bool VerifyAllMemory() {
-    return MallocBlock::CheckEverything();
-  }
-
-  virtual bool MallocMemoryStats(int* blocks, size_t* total,
-                                 int histogram[kMallocHistogramSize]) {
-    return MallocBlock::MemoryStats(blocks, total, histogram);
-  }
-
-  virtual size_t GetEstimatedAllocatedSize(size_t size) {
-    return size;
-  }
-
-  virtual size_t GetAllocatedSize(const void* p) {
-    if (p) {
-      RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned,
-                "ptr not allocated by tcmalloc");
-      return MallocBlock::FromRawPointer(p)->data_size();
-    }
-    return 0;
-  }
-
-  virtual MallocExtension::Ownership GetOwnership(const void* p) {
-    if (!p) {
-      // nobody owns NULL
-      return MallocExtension::kNotOwned;
-    }
-
-    // FIXME: note that correct GetOwnership should not touch memory
-    // that is not owned by tcmalloc. Main implementation is using
-    // pagemap to discover if page in question is owned by us or
-    // not. But pagemap only has marks for first and last page of
-    // spans.  Note that if p was returned out of our memalign with
-    // big alignment, then it will point outside of marked pages. Also
-    // note that FromRawPointer call below requires touching memory
-    // before pointer in order to handle memalign-ed chunks
-    // (offset_). This leaves us with two options:
-    //
-    // * do FromRawPointer first and have possibility of crashing if
-    //   we're given not owned pointer
-    //
-    // * return incorrect ownership for those large memalign chunks
-    //
-    // I've decided to choose later, which appears to happen rarer and
-    // therefore is arguably a lesser evil
-
-    MallocExtension::Ownership rv = TCMallocImplementation::GetOwnership(p);
-    if (rv != MallocExtension::kOwned) {
-      return rv;
-    }
-
-    const MallocBlock* mb = MallocBlock::FromRawPointer(p);
-    return TCMallocImplementation::GetOwnership(mb);
-  }
-
-  virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
-    static const char* kDebugFreeQueue = "debug.free_queue";
-
-    TCMallocImplementation::GetFreeListSizes(v);
-
-    MallocExtension::FreeListInfo i;
-    i.type = kDebugFreeQueue;
-    i.min_object_size = 0;
-    i.max_object_size = numeric_limits<size_t>::max();
-    i.total_bytes_free = MallocBlock::FreeQueueSize();
-    v->push_back(i);
-  }
-
- };
-
-static union {
-  char chars[sizeof(DebugMallocImplementation)];
-  void *ptr;
-} debug_malloc_implementation_space;
-
-REGISTER_MODULE_INITIALIZER(debugallocation, {
-#if (__cplusplus >= 201103L)
-    COMPILE_ASSERT(alignof(debug_malloc_implementation_space) >= alignof(DebugMallocImplementation),
-                   debug_malloc_implementation_space_is_not_properly_aligned);
-#endif
-  // Either we or valgrind will control memory management.  We
-  // register our extension if we're the winner. Otherwise let
-  // Valgrind use its own malloc (so don't register our extension).
-  if (!RunningOnValgrind()) {
-    DebugMallocImplementation *impl = new (debug_malloc_implementation_space.chars) DebugMallocImplementation();
-    MallocExtension::Register(impl);
-  }
-});
-
-REGISTER_MODULE_DESTRUCTOR(debugallocation, {
-  if (!RunningOnValgrind()) {
-    // When the program exits, check all blocks still in the free
-    // queue for corruption.
-    DanglingWriteChecker();
-  }
-});
-
-// ========================================================================= //
-
-struct debug_alloc_retry_data {
-  size_t size;
-  int new_type;
-};
-
-static void *retry_debug_allocate(void *arg) {
-  debug_alloc_retry_data *data = static_cast<debug_alloc_retry_data *>(arg);
-  return DebugAllocate(data->size, data->new_type);
-}
-
-// This is mostly the same a cpp_alloc in tcmalloc.cc.
-// TODO(csilvers): change Allocate() above to call cpp_alloc, so we
-// don't have to reproduce the logic here.  To make tc_new_mode work
-// properly, I think we'll need to separate out the logic of throwing
-// from the logic of calling the new-handler.
-inline void* debug_cpp_alloc(size_t size, int new_type, bool nothrow) {
-  void* p = DebugAllocate(size, new_type);
-  if (p != NULL) {
-    return p;
-  }
-  struct debug_alloc_retry_data data;
-  data.size = size;
-  data.new_type = new_type;
-  return handle_oom(retry_debug_allocate, &data,
-                    true, nothrow);
-}
-
-inline void* do_debug_malloc_or_debug_cpp_alloc(size_t size) {
-  void* p = DebugAllocate(size, MallocBlock::kMallocType);
-  if (p != NULL) {
-    return p;
-  }
-  struct debug_alloc_retry_data data;
-  data.size = size;
-  data.new_type = MallocBlock::kMallocType;
-  return handle_oom(retry_debug_allocate, &data,
-                    false, true);
-}
-
-// Exported routines
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyMalloc(size);
-  }
-  void* ptr = do_debug_malloc_or_debug_cpp_alloc(size);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyFree(ptr);
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, size);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t count, size_t size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyCalloc(count, size);
-  }
-  // Overflow check
-  const size_t total_size = count * size;
-  if (size != 0 && total_size / size != count) return NULL;
-
-  void* block = do_debug_malloc_or_debug_cpp_alloc(total_size);
-  MallocHook::InvokeNewHook(block, total_size);
-  if (block)  memset(block, 0, total_size);
-  return block;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyFree(ptr);
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    return tcmalloc::EmergencyRealloc(ptr, size);
-  }
-  if (ptr == NULL) {
-    ptr = do_debug_malloc_or_debug_cpp_alloc(size);
-    MallocHook::InvokeNewHook(ptr, size);
-    return ptr;
-  }
-  if (size == 0) {
-    MallocHook::InvokeDeleteHook(ptr);
-    DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-    return NULL;
-  }
-  MallocBlock* old = MallocBlock::FromRawPointer(ptr);
-  old->Check(MallocBlock::kMallocType);
-  MallocBlock* p = MallocBlock::Allocate(size, MallocBlock::kMallocType);
-
-  // If realloc fails we are to leave the old block untouched and
-  // return null
-  if (p == NULL)  return NULL;
-
-  // if ptr was allocated via memalign, then old->data_size() is not
-  // start of user data. So we must be careful to copy only user-data
-  char *old_begin = (char *)old->data_addr();
-  char *old_end = old_begin + old->data_size();
-
-  ssize_t old_ssize = old_end - (char *)ptr;
-  CHECK_CONDITION(old_ssize >= 0);
-
-  size_t old_size = (size_t)old_ssize;
-  CHECK_CONDITION(old_size <= old->data_size());
-
-  memcpy(p->data_addr(), ptr, (old_size < size) ? old_size : size);
-  MallocHook::InvokeDeleteHook(ptr);
-  MallocHook::InvokeNewHook(p->data_addr(), size);
-  DebugDeallocate(ptr, MallocBlock::kMallocType, 0);
-  MALLOC_TRACE("realloc", p->data_size(), p->data_addr());
-  return p->data_addr();
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false);
-  MallocHook::InvokeNewHook(ptr, size);
-  if (ptr == NULL) {
-    RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new failed.", size);
-  }
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, true);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw() {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, size);
-}
-
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false);
-  MallocHook::InvokeNewHook(ptr, size);
-  if (ptr == NULL) {
-    RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new[] failed.", size);
-  }
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
-    PERFTOOLS_THROW {
-  void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, true);
-  MallocHook::InvokeNewHook(ptr, size);
-  return ptr;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, 0);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw() {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, size);
-}
-
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(p);
-  DebugDeallocate(p, MallocBlock::kArrayNewType, 0);
-}
-
-// This is mostly the same as do_memalign in tcmalloc.cc.
-static void *do_debug_memalign(size_t alignment, size_t size) {
-  // Allocate >= size bytes aligned on "alignment" boundary
-  // "alignment" is a power of two.
-  void *p = 0;
-  RAW_CHECK((alignment & (alignment-1)) == 0, "must be power of two");
-  const size_t data_offset = MallocBlock::data_offset();
-  // Allocate "alignment-1" extra bytes to ensure alignment is possible, and
-  // a further data_offset bytes for an additional fake header.
-  size_t extra_bytes = data_offset + alignment - 1;
-  if (size + extra_bytes < size) return NULL;         // Overflow
-  p = DebugAllocate(size + extra_bytes, MallocBlock::kMallocType);
-  if (p != 0) {
-    intptr_t orig_p = reinterpret_cast<intptr_t>(p);
-    // Leave data_offset bytes for fake header, and round up to meet
-    // alignment.
-    p = reinterpret_cast<void *>(RoundUp(orig_p + data_offset, alignment));
-    // Create a fake header block with an offset_ that points back to the
-    // real header.  FromRawPointer uses this value.
-    MallocBlock *fake_hdr = reinterpret_cast<MallocBlock *>(
-                reinterpret_cast<char *>(p) - data_offset);
-    // offset_ is distance between real and fake headers.
-    // p is now end of fake header (beginning of client area),
-    // and orig_p is the end of the real header, so offset_
-    // is their difference.
-    //
-    // Note that other fields of fake_hdr are initialized with
-    // kMagicUninitializedByte
-    fake_hdr->set_offset(reinterpret_cast<intptr_t>(p) - orig_p);
-  }
-  return p;
-}
-
-struct memalign_retry_data {
-  size_t align;
-  size_t size;
-};
-
-static void *retry_debug_memalign(void *arg) {
-  memalign_retry_data *data = static_cast<memalign_retry_data *>(arg);
-  return do_debug_memalign(data->align, data->size);
-}
-
-inline void* do_debug_memalign_or_debug_cpp_memalign(size_t align,
-                                                     size_t size) {
-  void* p = do_debug_memalign(align, size);
-  if (p != NULL) {
-    return p;
-  }
-
-  struct memalign_retry_data data;
-  data.align = align;
-  data.size = size;
-  return handle_oom(retry_debug_memalign, &data,
-                    false, true);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, size_t size) PERFTOOLS_THROW {
-  void *p = do_debug_memalign_or_debug_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-// Implementation taken from tcmalloc/tcmalloc.cc
-extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(void** result_ptr, size_t align, size_t size)
-    PERFTOOLS_THROW {
-  if (((align % sizeof(void*)) != 0) ||
-      ((align & (align - 1)) != 0) ||
-      (align == 0)) {
-    return EINVAL;
-  }
-
-  void* result = do_debug_memalign_or_debug_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  if (result == NULL) {
-    return ENOMEM;
-  } else {
-    *result_ptr = result;
-    return 0;
-  }
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW {
-  // Allocate >= size bytes starting on a page boundary
-  void *p = do_debug_memalign_or_debug_cpp_memalign(getpagesize(), size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW {
-  // Round size up to a multiple of pages
-  // then allocate memory on a page boundary
-  int pagesize = getpagesize();
-  size = RoundUp(size, pagesize);
-  if (size == 0) {     // pvalloc(0) should allocate one page, according to
-    size = pagesize;   // http://man.free4web.biz/man3/libmpatrol.3.html
-  }
-  void *p = do_debug_memalign_or_debug_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-// malloc_stats just falls through to the base implementation.
-extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW {
-  do_malloc_stats();
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW {
-  return do_mallopt(cmd, value);
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW {
-  return do_mallinfo();
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW {
-  return MallocExtension::instance()->GetAllocatedSize(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW {
-  void* result = DebugAllocate(size, MallocBlock::kMallocType);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-#pragma GCC diagnostic pop
diff --git a/contrib/libtcmalloc/src/getenv_safe.h b/contrib/libtcmalloc/src/getenv_safe.h
deleted file mode 100644
index 3b9f4dbbcb2..00000000000
--- a/contrib/libtcmalloc/src/getenv_safe.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
- * Copyright (c) 2014, gperftools Contributors
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GETENV_SAFE_H
-#define GETENV_SAFE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* 
- * This getenv function is safe to call before the C runtime is initialized.
- * On Windows, it utilizes GetEnvironmentVariable() and on unix it uses
- * /proc/self/environ instead calling getenv().  It's intended to be used in
- * routines that run before main(), when the state required for getenv() may
- * not be set up yet.  In particular, errno isn't set up until relatively late
- * (after the pthreads library has a chance to make it threadsafe), and
- * getenv() doesn't work until then.
- * On some platforms, this call will utilize the same, static buffer for
- * repeated GetenvBeforeMain() calls. Callers should not expect pointers from
- * this routine to be long lived.
- * Note that on unix, /proc only has the environment at the time the
- * application was started, so this routine ignores setenv() calls/etc.  Also
- * note it only reads the first 16K of the environment.
- * 
- * NOTE: this is version of GetenvBeforeMain that's usable from
- * C. Implementation is in sysinfo.cc
- */
-const char* TCMallocGetenvSafe(const char* name);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/contrib/libtcmalloc/src/getpc.h b/contrib/libtcmalloc/src/getpc.h
deleted file mode 100644
index 163873eabc6..00000000000
--- a/contrib/libtcmalloc/src/getpc.h
+++ /dev/null
@@ -1,192 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// This is an internal header file used by profiler.cc.  It defines
-// the single (inline) function GetPC.  GetPC is used in a signal
-// handler to figure out the instruction that was being executed when
-// the signal-handler was triggered.
-//
-// To get this, we use the ucontext_t argument to the signal-handler
-// callback, which holds the full context of what was going on when
-// the signal triggered.  How to get from a ucontext_t to a Program
-// Counter is OS-dependent.
-
-#ifndef BASE_GETPC_H_
-#define BASE_GETPC_H_
-
-#include "config.h"
-
-// On many linux systems, we may need _GNU_SOURCE to get access to
-// the defined constants that define the register we want to see (eg
-// REG_EIP).  Note this #define must come first!
-#define _GNU_SOURCE 1
-// If #define _GNU_SOURCE causes problems, this might work instead.
-// It will cause problems for FreeBSD though!, because it turns off
-// the needed __BSD_VISIBLE.
-//#define _XOPEN_SOURCE 500
-
-#include <string.h>         // for memcmp
-#if defined(HAVE_SYS_UCONTEXT_H)
-#include <sys/ucontext.h>
-#elif defined(HAVE_UCONTEXT_H)
-#include <ucontext.h>       // for ucontext_t (and also mcontext_t)
-#elif defined(HAVE_CYGWIN_SIGNAL_H)
-#include <cygwin/signal.h>
-typedef ucontext ucontext_t;
-#endif
-
-
-// Take the example where function Foo() calls function Bar().  For
-// many architectures, Bar() is responsible for setting up and tearing
-// down its own stack frame.  In that case, it's possible for the
-// interrupt to happen when execution is in Bar(), but the stack frame
-// is not properly set up (either before it's done being set up, or
-// after it's been torn down but before Bar() returns).  In those
-// cases, the stack trace cannot see the caller function anymore.
-//
-// GetPC can try to identify this situation, on architectures where it
-// might occur, and unwind the current function call in that case to
-// avoid false edges in the profile graph (that is, edges that appear
-// to show a call skipping over a function).  To do this, we hard-code
-// in the asm instructions we might see when setting up or tearing
-// down a stack frame.
-//
-// This is difficult to get right: the instructions depend on the
-// processor, the compiler ABI, and even the optimization level.  This
-// is a best effort patch -- if we fail to detect such a situation, or
-// mess up the PC, nothing happens; the returned PC is not used for
-// any further processing.
-struct CallUnrollInfo {
-  // Offset from (e)ip register where this instruction sequence
-  // should be matched. Interpreted as bytes. Offset 0 is the next
-  // instruction to execute. Be extra careful with negative offsets in
-  // architectures of variable instruction length (like x86) - it is
-  // not that easy as taking an offset to step one instruction back!
-  int pc_offset;
-  // The actual instruction bytes. Feel free to make it larger if you
-  // need a longer sequence.
-  unsigned char ins[16];
-  // How many bytes to match from ins array?
-  int ins_size;
-  // The offset from the stack pointer (e)sp where to look for the
-  // call return address. Interpreted as bytes.
-  int return_sp_offset;
-};
-
-
-// The dereferences needed to get the PC from a struct ucontext were
-// determined at configure time, and stored in the macro
-// PC_FROM_UCONTEXT in config.h.  The only thing we need to do here,
-// then, is to do the magic call-unrolling for systems that support it.
-
-// -- Special case 1: linux x86, for which we have CallUnrollInfo
-#if defined(__linux) && defined(__i386) && defined(__GNUC__)
-static const CallUnrollInfo callunrollinfo[] = {
-  // Entry to a function:  push %ebp;  mov  %esp,%ebp
-  // Top-of-stack contains the caller IP.
-  { 0,
-    {0x55, 0x89, 0xe5}, 3,
-    0
-  },
-  // Entry to a function, second instruction:  push %ebp;  mov  %esp,%ebp
-  // Top-of-stack contains the old frame, caller IP is +4.
-  { -1,
-    {0x55, 0x89, 0xe5}, 3,
-    4
-  },
-  // Return from a function: RET.
-  // Top-of-stack contains the caller IP.
-  { 0,
-    {0xc3}, 1,
-    0
-  }
-};
-
-inline void* GetPC(const ucontext_t& signal_ucontext) {
-  // See comment above struct CallUnrollInfo.  Only try instruction
-  // flow matching if both eip and esp looks reasonable.
-  const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP];
-  const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP];
-  if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
-      (esp & 0xffff0000) != 0) {
-    char* eip_char = reinterpret_cast<char*>(eip);
-    for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
-      if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
-                  callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
-        // We have a match.
-        void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
-        return *retaddr;
-      }
-    }
-  }
-  return (void*)eip;
-}
-
-// Special case #2: Windows, which has to do something totally different.
-#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-// If this is ever implemented, probably the way to do it is to have
-// profiler.cc use a high-precision timer via timeSetEvent:
-//    http://msdn2.microsoft.com/en-us/library/ms712713.aspx
-// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
-// The callback function would be something like prof_handler, but
-// alas the arguments are different: no ucontext_t!  I don't know
-// how we'd get the PC (using StackWalk64?)
-//    http://msdn2.microsoft.com/en-us/library/ms680650.aspx
-
-#include "base/logging.h"   // for RAW_LOG
-#ifndef HAVE_CYGWIN_SIGNAL_H
-typedef int ucontext_t;
-#endif
-
-inline void* GetPC(const struct ucontext_t& signal_ucontext) {
-  RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
-  return NULL;
-}
-
-// Normal cases.  If this doesn't compile, it's probably because
-// PC_FROM_UCONTEXT is the empty string.  You need to figure out
-// the right value for your system, and add it to the list in
-// configure.ac (or set it manually in your config.h).
-#else
-inline void* GetPC(const ucontext_t& signal_ucontext) {
-#if defined(__s390__) && !defined(__s390x__)
-  // Mask out the AMODE31 bit from the PC recorded in the context.
-  return (void*)((unsigned long)signal_ucontext.PC_FROM_UCONTEXT & 0x7fffffffUL);
-#else
-  return (void*)signal_ucontext.PC_FROM_UCONTEXT;   // defined in config.h
-#endif
-}
-
-#endif
-
-#endif  // BASE_GETPC_H_
diff --git a/contrib/libtcmalloc/src/heap-checker-bcad.cc b/contrib/libtcmalloc/src/heap-checker-bcad.cc
deleted file mode 100644
index 00efdb7cfd4..00000000000
--- a/contrib/libtcmalloc/src/heap-checker-bcad.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Maxim Lifantsev
-//
-// A file to ensure that components of heap leak checker run before
-// all global object constructors and after all global object
-// destructors.
-//
-// This file must be the last library any binary links against.
-// Otherwise, the heap checker may not be able to run early enough to
-// catalog all the global objects in your program.  If this happens,
-// and later in the program you allocate memory and have one of these
-// "uncataloged" global objects point to it, the heap checker will
-// consider that allocation to be a leak, even though it's not (since
-// the allocated object is reachable from global data and hence "live").
-
-#include <stdlib.h>      // for abort()
-#include <gperftools/malloc_extension.h>
-
-// A dummy variable to refer from heap-checker.cc.  This is to make
-// sure this file is not optimized out by the linker.
-bool heap_leak_checker_bcad_variable;
-
-extern void HeapLeakChecker_AfterDestructors();  // in heap-checker.cc
-
-// A helper class to ensure that some components of heap leak checking
-// can happen before construction and after destruction
-// of all global/static objects.
-class HeapLeakCheckerGlobalPrePost {
- public:
-  HeapLeakCheckerGlobalPrePost() {
-    if (count_ == 0) {
-      // The 'new int' will ensure that we have run an initial malloc
-      // hook, which will set up the heap checker via
-      // MallocHook_InitAtFirstAllocation_HeapLeakChecker.  See malloc_hook.cc.
-      // This is done in this roundabout fashion in order to avoid self-deadlock
-      // if we directly called HeapLeakChecker_BeforeConstructors here.
-      delete new int;
-      // This needs to be called before the first allocation of an STL
-      // object, but after libc is done setting up threads (because it
-      // calls setenv, which requires a thread-aware errno).  By
-      // putting it here, we hope it's the first bit of code executed
-      // after the libc global-constructor code.
-      MallocExtension::Initialize();
-    }
-    ++count_;
-  }
-  ~HeapLeakCheckerGlobalPrePost() {
-    if (count_ <= 0)  abort();
-    --count_;
-    if (count_ == 0)  HeapLeakChecker_AfterDestructors();
-  }
- private:
-  // Counter of constructions/destructions of objects of this class
-  // (just in case there are more than one of them).
-  static int count_;
-};
-
-int HeapLeakCheckerGlobalPrePost::count_ = 0;
-
-// The early-construction/late-destruction global object.
-static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post;
diff --git a/contrib/libtcmalloc/src/heap-checker.cc b/contrib/libtcmalloc/src/heap-checker.cc
deleted file mode 100644
index 9c82dea08e4..00000000000
--- a/contrib/libtcmalloc/src/heap-checker.cc
+++ /dev/null
@@ -1,2388 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Maxim Lifantsev
-//
-
-#include "config.h"
-
-#include <fcntl.h>    // for O_RDONLY (we use syscall to do actual reads)
-#include <string.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <time.h>
-#include <assert.h>
-
-#if defined(HAVE_LINUX_PTRACE_H)
-#include <linux/ptrace.h>
-#endif
-#ifdef HAVE_SYS_SYSCALL_H
-#include <sys/syscall.h>
-#endif
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-#include <wtypes.h>
-#include <winbase.h>
-#undef ERROR     // windows defines these as macros, which can cause trouble
-#undef max
-#undef min
-#endif
-
-#include <string>
-#include <vector>
-#include <map>
-#include <set>
-#include <algorithm>
-#include <functional>
-
-#include <gperftools/heap-checker.h>
-
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include <gperftools/stacktrace.h>
-#include "base/commandlineflags.h"
-#include "base/elfcore.h"              // for i386_regs
-#include "base/thread_lister.h"
-#include "heap-profile-table.h"
-#include "base/low_level_alloc.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-#include <gperftools/malloc_extension.h>
-#include "maybe_threads.h"
-#include "memory_region_map.h"
-#include "base/spinlock.h"
-#include "base/sysinfo.h"
-#include "base/stl_allocator.h"
-
-using std::string;
-using std::basic_string;
-using std::pair;
-using std::map;
-using std::set;
-using std::vector;
-using std::swap;
-using std::make_pair;
-using std::min;
-using std::max;
-using std::less;
-using std::char_traits;
-
-// If current process is being ptrace()d, 'TracerPid' in /proc/self/status
-// will be non-zero.
-static bool IsDebuggerAttached(void) {    // only works under linux, probably
-  char buf[256];   // TracerPid comes relatively earlier in status output
-  int fd = open("/proc/self/status", O_RDONLY);
-  if (fd == -1) {
-    return false;  // Can't tell for sure.
-  }
-  const int len = read(fd, buf, sizeof(buf));
-  bool rc = false;
-  if (len > 0) {
-    const char *const kTracerPid = "TracerPid:\t";
-    buf[len - 1] = '\0';
-    const char *p = strstr(buf, kTracerPid);
-    if (p != NULL) {
-      rc = (strncmp(p + strlen(kTracerPid), "0\n", 2) != 0);
-    }
-  }
-  close(fd);
-  return rc;
-}
-
-// This is the default if you don't link in -lprofiler
-extern "C" {
-ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads();
-bool ProfilingIsEnabledForAllThreads() { return false; }
-}
-
-//----------------------------------------------------------------------
-// Flags that control heap-checking
-//----------------------------------------------------------------------
-
-DEFINE_string(heap_check,
-              EnvToString("HEAPCHECK", ""),
-              "The heap leak checking to be done over the whole executable: "
-              "\"minimal\", \"normal\", \"strict\", "
-              "\"draconian\", \"as-is\", and \"local\" "
-              " or the empty string are the supported choices. "
-              "(See HeapLeakChecker_InternalInitStart for details.)");
-
-DEFINE_bool(heap_check_report, true, "Obsolete");
-
-DEFINE_bool(heap_check_before_constructors,
-            true,
-            "deprecated; pretty much always true now");
-
-DEFINE_bool(heap_check_after_destructors,
-            EnvToBool("HEAP_CHECK_AFTER_DESTRUCTORS", false),
-            "If overall heap check is to end after global destructors "
-            "or right after all REGISTER_HEAPCHECK_CLEANUP's");
-
-DEFINE_bool(heap_check_strict_check, true, "Obsolete");
-
-DEFINE_bool(heap_check_ignore_global_live,
-            EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true),
-            "If overall heap check is to ignore heap objects reachable "
-            "from the global data");
-
-DEFINE_bool(heap_check_identify_leaks,
-            EnvToBool("HEAP_CHECK_IDENTIFY_LEAKS", false),
-            "If heap check should generate the addresses of the leaked "
-            "objects in the memory leak profiles.  This may be useful "
-            "in tracking down leaks where only a small fraction of "
-            "objects allocated at the same stack trace are leaked.");
-
-DEFINE_bool(heap_check_ignore_thread_live,
-            EnvToBool("HEAP_CHECK_IGNORE_THREAD_LIVE", true),
-            "If set to true, objects reachable from thread stacks "
-            "and registers are not reported as leaks");
-
-DEFINE_bool(heap_check_test_pointer_alignment,
-            EnvToBool("HEAP_CHECK_TEST_POINTER_ALIGNMENT", false),
-            "Set to true to check if the found leak can be due to "
-            "use of unaligned pointers");
-
-// Alignment at which all pointers in memory are supposed to be located;
-// use 1 if any alignment is ok.
-// heap_check_test_pointer_alignment flag guides if we try the value of 1.
-// The larger it can be, the lesser is the chance of missing real leaks.
-static const size_t kPointerSourceAlignment = sizeof(void*);
-DEFINE_int32(heap_check_pointer_source_alignment,
-	     EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT",
-                      kPointerSourceAlignment),
-             "Alignment at which all pointers in memory are supposed to be "
-             "located.  Use 1 if any alignment is ok.");
-
-// A reasonable default to handle pointers inside of typical class objects:
-// Too low and we won't be able to traverse pointers to normally-used
-// nested objects and base parts of multiple-inherited objects.
-// Too high and it will both slow down leak checking (FindInsideAlloc
-// in HaveOnHeapLocked will get slower when there are large on-heap objects)
-// and make it probabilistically more likely to miss leaks
-// of large-sized objects.
-static const int64 kHeapCheckMaxPointerOffset = 1024;
-DEFINE_int64(heap_check_max_pointer_offset,
-	     EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET",
-                      kHeapCheckMaxPointerOffset),
-             "Largest pointer offset for which we traverse "
-             "pointers going inside of heap allocated objects. "
-             "Set to -1 to use the actual largest heap object size.");
-
-DEFINE_bool(heap_check_run_under_gdb,
-            EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false),
-            "If false, turns off heap-checking library when running under gdb "
-            "(normally, set to 'true' only when debugging the heap-checker)");
-
-DEFINE_int32(heap_check_delay_seconds, 0,
-             "Number of seconds to delay on-exit heap checking."
-             " If you set this flag,"
-             " you may also want to set exit_timeout_seconds in order to"
-             " avoid exit timeouts.\n"
-             "NOTE: This flag is to be used only to help diagnose issues"
-             " where it is suspected that the heap checker is reporting"
-             " false leaks that will disappear if the heap checker delays"
-             " its checks. Report any such issues to the heap-checker"
-             " maintainer(s).");
-
-//----------------------------------------------------------------------
-
-DEFINE_string(heap_profile_pprof,
-              EnvToString("PPROF_PATH", "pprof"),
-              "OBSOLETE; not used");
-
-DEFINE_string(heap_check_dump_directory,
-              EnvToString("HEAP_CHECK_DUMP_DIRECTORY", "/tmp"),
-              "Directory to put heap-checker leak dump information");
-
-
-//----------------------------------------------------------------------
-// HeapLeakChecker global data
-//----------------------------------------------------------------------
-
-// Global lock for all the global data of this module.
-static SpinLock heap_checker_lock(SpinLock::LINKER_INITIALIZED);
-
-//----------------------------------------------------------------------
-
-// Heap profile prefix for leak checking profiles.
-// Gets assigned once when leak checking is turned on, then never modified.
-static const string* profile_name_prefix = NULL;
-
-// Whole-program heap leak checker.
-// Gets assigned once when leak checking is turned on,
-// then main_heap_checker is never deleted.
-static HeapLeakChecker* main_heap_checker = NULL;
-
-// Whether we will use main_heap_checker to do a check at program exit
-// automatically. In any case user can ask for more checks on main_heap_checker
-// via GlobalChecker().
-static bool do_main_heap_check = false;
-
-// The heap profile we use to collect info about the heap.
-// This is created in HeapLeakChecker::BeforeConstructorsLocked
-// together with setting heap_checker_on (below) to true
-// and registering our new/delete malloc hooks;
-// similarly all are unset in HeapLeakChecker::TurnItselfOffLocked.
-static HeapProfileTable* heap_profile = NULL;
-
-// If we are doing (or going to do) any kind of heap-checking.
-static bool heap_checker_on = false;
-
-// pid of the process that does whole-program heap leak checking
-static pid_t heap_checker_pid = 0;
-
-// If we did heap profiling during global constructors execution
-static bool constructor_heap_profiling = false;
-
-// RAW_VLOG level we dump key INFO messages at.  If you want to turn
-// off these messages, set the environment variable PERFTOOLS_VERBOSE=-1.
-static const int heap_checker_info_level = 0;
-
-//----------------------------------------------------------------------
-// HeapLeakChecker's own memory allocator that is
-// independent of the normal program allocator.
-//----------------------------------------------------------------------
-
-// Wrapper of LowLevelAlloc for STL_Allocator and direct use.
-// We always access this class under held heap_checker_lock,
-// this allows us to in particular protect the period when threads are stopped
-// at random spots with TCMalloc_ListAllProcessThreads by heap_checker_lock,
-// w/o worrying about the lock in LowLevelAlloc::Arena.
-// We rely on the fact that we use an own arena with an own lock here.
-class HeapLeakChecker::Allocator {
- public:
-  static void Init() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    RAW_DCHECK(arena_ == NULL, "");
-    arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena());
-  }
-  static void Shutdown() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    if (!LowLevelAlloc::DeleteArena(arena_)  ||  alloc_count_ != 0) {
-      RAW_LOG(FATAL, "Internal heap checker leak of %d objects", alloc_count_);
-    }
-  }
-  static int alloc_count() {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    return alloc_count_;
-  }
-  static void* Allocate(size_t n) {
-    RAW_DCHECK(arena_  &&  heap_checker_lock.IsHeld(), "");
-    void* p = LowLevelAlloc::AllocWithArena(n, arena_);
-    if (p) alloc_count_ += 1;
-    return p;
-  }
-  static void Free(void* p) {
-    RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-    if (p) alloc_count_ -= 1;
-    LowLevelAlloc::Free(p);
-  }
-  static void Free(void* p, size_t /* n */) {
-    Free(p);
-  }
-  // destruct, free, and make *p to be NULL
-  template<typename T> static void DeleteAndNull(T** p) {
-    (*p)->~T();
-    Free(*p);
-    *p = NULL;
-  }
-  template<typename T> static void DeleteAndNullIfNot(T** p) {
-    if (*p != NULL) DeleteAndNull(p);
-  }
- private:
-  static LowLevelAlloc::Arena* arena_;
-  static int alloc_count_;
-};
-
-LowLevelAlloc::Arena* HeapLeakChecker::Allocator::arena_ = NULL;
-int HeapLeakChecker::Allocator::alloc_count_ = 0;
-
-//----------------------------------------------------------------------
-// HeapLeakChecker live object tracking components
-//----------------------------------------------------------------------
-
-// Cases of live object placement we distinguish
-enum ObjectPlacement {
-  MUST_BE_ON_HEAP,   // Must point to a live object of the matching size in the
-                     // heap_profile map of the heap when we get to it
-  IGNORED_ON_HEAP,   // Is a live (ignored) object on heap
-  MAYBE_LIVE,        // Is a piece of writable memory from /proc/self/maps
-  IN_GLOBAL_DATA,    // Is part of global data region of the executable
-  THREAD_DATA,       // Part of a thread stack and a thread descriptor with TLS
-  THREAD_REGISTERS,  // Values in registers of some thread
-};
-
-// Information about an allocated object
-struct AllocObject {
-  const void* ptr;        // the object
-  uintptr_t size;         // its size
-  ObjectPlacement place;  // where ptr points to
-
-  AllocObject(const void* p, size_t s, ObjectPlacement l)
-    : ptr(p), size(s), place(l) { }
-};
-
-// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject
-// Key is the object's address; value is its size.
-typedef map<uintptr_t, size_t, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, size_t>,
-                          HeapLeakChecker::Allocator>
-           > IgnoredObjectsMap;
-static IgnoredObjectsMap* ignored_objects = NULL;
-
-// All objects (memory ranges) that we consider to be the sources of pointers
-// to live (not leaked) objects.
-// At different times this holds (what can be reached from) global data regions
-// and the objects we've been told to ignore.
-// For any AllocObject::ptr "live_objects" is supposed to contain at most one
-// record at any time. We maintain this by checking with the heap_profile map
-// of the heap and removing the live heap objects we've handled from it.
-// This vector is maintained as a stack and the frontier of reachable
-// live heap objects in our flood traversal of them.
-typedef vector<AllocObject,
-               STL_Allocator<AllocObject, HeapLeakChecker::Allocator>
-              > LiveObjectsStack;
-static LiveObjectsStack* live_objects = NULL;
-
-// A special string type that uses my allocator
-typedef basic_string<char, char_traits<char>,
-                     STL_Allocator<char, HeapLeakChecker::Allocator>
-                    > HCL_string;
-
-// A placeholder to fill-in the starting values for live_objects
-// for each library so we can keep the library-name association for logging.
-typedef map<HCL_string, LiveObjectsStack, less<HCL_string>,
-            STL_Allocator<pair<const HCL_string, LiveObjectsStack>,
-                          HeapLeakChecker::Allocator>
-           > LibraryLiveObjectsStacks;
-static LibraryLiveObjectsStacks* library_live_objects = NULL;
-
-// Value stored in the map of disabled address ranges;
-// its key is the end of the address range.
-// We'll ignore allocations with a return address in a disabled range
-// if the address occurs at 'max_depth' or less in the stack trace.
-struct HeapLeakChecker::RangeValue {
-  uintptr_t start_address;  // the start of the range
-  int       max_depth;      // the maximal stack depth to disable at
-};
-typedef map<uintptr_t, HeapLeakChecker::RangeValue, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, HeapLeakChecker::RangeValue>,
-                          HeapLeakChecker::Allocator>
-           > DisabledRangeMap;
-// The disabled program counter address ranges for profile dumping
-// that are registered with HeapLeakChecker::DisableChecksFromToLocked.
-static DisabledRangeMap* disabled_ranges = NULL;
-
-// Set of stack tops.
-// These are used to consider live only appropriate chunks of the memory areas
-// that are used for stacks (and maybe thread-specific data as well)
-// so that we do not treat pointers from outdated stack frames as live.
-typedef set<uintptr_t, less<uintptr_t>,
-            STL_Allocator<uintptr_t, HeapLeakChecker::Allocator>
-           > StackTopSet;
-static StackTopSet* stack_tops = NULL;
-
-// A map of ranges of code addresses for the system libraries
-// that can mmap/mremap/sbrk-allocate memory regions for stacks
-// and thread-local storage that we want to consider as live global data.
-// Maps from the end address to the start address.
-typedef map<uintptr_t, uintptr_t, less<uintptr_t>,
-            STL_Allocator<pair<const uintptr_t, uintptr_t>,
-                          HeapLeakChecker::Allocator>
-           > GlobalRegionCallerRangeMap;
-static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL;
-
-// TODO(maxim): make our big data structs into own modules
-
-// Disabler is implemented by keeping track of a per-thread count
-// of active Disabler objects.  Any objects allocated while the
-// count > 0 are not reported.
-
-#ifdef HAVE_TLS
-
-static __thread int thread_disable_counter
-// The "inital exec" model is faster than the default TLS model, at
-// the cost you can't dlopen this library.  But dlopen on heap-checker
-// doesn't work anyway -- it must run before main -- so this is a good
-// trade-off.
-# ifdef HAVE___ATTRIBUTE__
-   __attribute__ ((tls_model ("initial-exec")))
-# endif
-    ;
-inline int get_thread_disable_counter() {
-  return thread_disable_counter;
-}
-inline void set_thread_disable_counter(int value) {
-  thread_disable_counter = value;
-}
-
-#else  // #ifdef HAVE_TLS
-
-static pthread_key_t thread_disable_counter_key;
-static int main_thread_counter;   // storage for use before main()
-static bool use_main_thread_counter = true;
-
-// TODO(csilvers): this is called from NewHook, in the middle of malloc().
-// If perftools_pthread_getspecific calls malloc, that will lead to an
-// infinite loop.  I don't know how to fix that, so I hope it never happens!
-inline int get_thread_disable_counter() {
-  if (use_main_thread_counter)  // means we're running really early
-    return main_thread_counter;
-  void* p = perftools_pthread_getspecific(thread_disable_counter_key);
-  return (intptr_t)p;   // kinda evil: store the counter directly in the void*
-}
-
-inline void set_thread_disable_counter(int value) {
-  if (use_main_thread_counter) {   // means we're running really early
-    main_thread_counter = value;
-    return;
-  }
-  intptr_t pointer_sized_value = value;
-  // kinda evil: store the counter directly in the void*
-  void* p = (void*)pointer_sized_value;
-  // NOTE: this may call malloc, which will call NewHook which will call
-  // get_thread_disable_counter() which will call pthread_getspecific().  I
-  // don't know if anything bad can happen if we call getspecific() in the
-  // middle of a setspecific() call.  It seems to work ok in practice...
-  perftools_pthread_setspecific(thread_disable_counter_key, p);
-}
-
-// The idea here is that this initializer will run pretty late: after
-// pthreads have been totally set up.  At this point we can call
-// pthreads routines, so we set those up.
-class InitThreadDisableCounter {
- public:
-  InitThreadDisableCounter() {
-    perftools_pthread_key_create(&thread_disable_counter_key, NULL);
-    // Set up the main thread's value, which we have a special variable for.
-    void* p = (void*)main_thread_counter;   // store the counter directly
-    perftools_pthread_setspecific(thread_disable_counter_key, p);
-    use_main_thread_counter = false;
-  }
-};
-InitThreadDisableCounter init_thread_disable_counter;
-
-#endif  // #ifdef HAVE_TLS
-
-HeapLeakChecker::Disabler::Disabler() {
-  // It is faster to unconditionally increment the thread-local
-  // counter than to check whether or not heap-checking is on
-  // in a thread-safe manner.
-  int counter = get_thread_disable_counter();
-  set_thread_disable_counter(counter + 1);
-  RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1);
-}
-
-HeapLeakChecker::Disabler::~Disabler() {
-  int counter = get_thread_disable_counter();
-  RAW_DCHECK(counter > 0, "");
-  if (counter > 0) {
-    set_thread_disable_counter(counter - 1);
-    RAW_VLOG(10, "Decreasing thread disable counter to %d", counter);
-  } else {
-    RAW_VLOG(0, "Thread disable counter underflow : %d", counter);
-  }
-}
-
-//----------------------------------------------------------------------
-
-// The size of the largest heap object allocated so far.
-static size_t max_heap_object_size = 0;
-// The possible range of addresses that can point
-// into one of the elements of heap_objects.
-static uintptr_t min_heap_address = uintptr_t(-1LL);
-static uintptr_t max_heap_address = 0;
-
-//----------------------------------------------------------------------
-
-// Simple casting helpers for uintptr_t and void*:
-template<typename T>
-inline static const void* AsPtr(T addr) {
-  return reinterpret_cast<void*>(addr);
-}
-inline static uintptr_t AsInt(const void* ptr) {
-  return reinterpret_cast<uintptr_t>(ptr);
-}
-
-//----------------------------------------------------------------------
-
-// We've seen reports that strstr causes heap-checker crashes in some
-// libc's (?):
-//    http://code.google.com/p/gperftools/issues/detail?id=263
-// It's simple enough to use our own.  This is not in time-critical code.
-static const char* hc_strstr(const char* s1, const char* s2) {
-  const size_t len = strlen(s2);
-  RAW_CHECK(len > 0, "Unexpected empty string passed to strstr()");
-  for (const char* p = strchr(s1, *s2); p != NULL; p = strchr(p+1, *s2)) {
-    if (strncmp(p, s2, len) == 0) {
-      return p;
-    }
-  }
-  return NULL;
-}
-
-//----------------------------------------------------------------------
-
-// Our hooks for MallocHook
-static void NewHook(const void* ptr, size_t size) {
-  if (ptr != NULL) {
-    const int counter = get_thread_disable_counter();
-    const bool ignore = (counter > 0);
-    RAW_VLOG(16, "Recording Alloc: %p of %" PRIuS "; %d", ptr, size,
-             int(counter));
-
-    // Fetch the caller's stack trace before acquiring heap_checker_lock.
-    void* stack[HeapProfileTable::kMaxStackDepth];
-    int depth = HeapProfileTable::GetCallerStackTrace(0, stack);
-
-    { SpinLockHolder l(&heap_checker_lock);
-      if (size > max_heap_object_size) max_heap_object_size = size;
-      uintptr_t addr = AsInt(ptr);
-      if (addr < min_heap_address) min_heap_address = addr;
-      addr += size;
-      if (addr > max_heap_address) max_heap_address = addr;
-      if (heap_checker_on) {
-        heap_profile->RecordAlloc(ptr, size, depth, stack);
-        if (ignore) {
-          heap_profile->MarkAsIgnored(ptr);
-        }
-      }
-    }
-    RAW_VLOG(17, "Alloc Recorded: %p of %" PRIuS "", ptr, size);
-  }
-}
-
-static void DeleteHook(const void* ptr) {
-  if (ptr != NULL) {
-    RAW_VLOG(16, "Recording Free %p", ptr);
-    { SpinLockHolder l(&heap_checker_lock);
-      if (heap_checker_on) heap_profile->RecordFree(ptr);
-    }
-    RAW_VLOG(17, "Free Recorded: %p", ptr);
-  }
-}
-
-//----------------------------------------------------------------------
-
-enum StackDirection {
-  GROWS_TOWARDS_HIGH_ADDRESSES,
-  GROWS_TOWARDS_LOW_ADDRESSES,
-  UNKNOWN_DIRECTION
-};
-
-// Determine which way the stack grows:
-
-static StackDirection ATTRIBUTE_NOINLINE GetStackDirection(
-    const uintptr_t *const ptr) {
-  uintptr_t x;
-  if (&x < ptr)
-    return GROWS_TOWARDS_LOW_ADDRESSES;
-  if (ptr < &x)
-    return GROWS_TOWARDS_HIGH_ADDRESSES;
-
-  RAW_CHECK(0, "");  // Couldn't determine the stack direction.
-
-  return UNKNOWN_DIRECTION;
-}
-
-// Direction of stack growth (will initialize via GetStackDirection())
-static StackDirection stack_direction = UNKNOWN_DIRECTION;
-
-// This routine is called for every thread stack we know about to register it.
-static void RegisterStackLocked(const void* top_ptr) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(10, "Thread stack at %p", top_ptr);
-  uintptr_t top = AsInt(top_ptr);
-  stack_tops->insert(top);  // add for later use
-
-  // make sure stack_direction is initialized
-  if (stack_direction == UNKNOWN_DIRECTION) {
-    stack_direction = GetStackDirection(&top);
-  }
-
-  // Find memory region with this stack
-  MemoryRegionMap::Region region;
-  if (MemoryRegionMap::FindAndMarkStackRegion(top, &region)) {
-    // Make the proper portion of the stack live:
-    if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-      RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                  top_ptr, region.end_addr - top);
-      live_objects->push_back(AllocObject(top_ptr, region.end_addr - top,
-                                          THREAD_DATA));
-    } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-      RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                  AsPtr(region.start_addr),
-                  top - region.start_addr);
-      live_objects->push_back(AllocObject(AsPtr(region.start_addr),
-                                          top - region.start_addr,
-                                          THREAD_DATA));
-    }
-  // not in MemoryRegionMap, look in library_live_objects:
-  } else if (FLAGS_heap_check_ignore_global_live) {
-    for (LibraryLiveObjectsStacks::iterator lib = library_live_objects->begin();
-         lib != library_live_objects->end(); ++lib) {
-      for (LiveObjectsStack::iterator span = lib->second.begin();
-           span != lib->second.end(); ++span) {
-        uintptr_t start = AsInt(span->ptr);
-        uintptr_t end = start + span->size;
-        if (start <= top  &&  top < end) {
-          RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p",
-                      top_ptr, AsPtr(start), AsPtr(end));
-          // Shrink start..end region by chopping away the memory regions in
-          // MemoryRegionMap that land in it to undo merging of regions
-          // in /proc/self/maps, so that we correctly identify what portion
-          // of start..end is actually the stack region.
-          uintptr_t stack_start = start;
-          uintptr_t stack_end = end;
-          // can optimize-away this loop, but it does not run often
-          RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-          for (MemoryRegionMap::RegionIterator r =
-                 MemoryRegionMap::BeginRegionLocked();
-               r != MemoryRegionMap::EndRegionLocked(); ++r) {
-            if (top < r->start_addr  &&  r->start_addr < stack_end) {
-              stack_end = r->start_addr;
-            }
-            if (stack_start < r->end_addr  &&  r->end_addr <= top) {
-              stack_start = r->end_addr;
-            }
-          }
-          if (stack_start != start  ||  stack_end != end) {
-            RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p",
-                        top_ptr, AsPtr(stack_start), AsPtr(stack_end));
-          }
-          // Make the proper portion of the stack live:
-          if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-            RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                        top_ptr, stack_end - top);
-            live_objects->push_back(
-              AllocObject(top_ptr, stack_end - top, THREAD_DATA));
-          } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-            RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
-                        AsPtr(stack_start), top - stack_start);
-            live_objects->push_back(
-              AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA));
-          }
-          lib->second.erase(span);  // kill the rest of the region
-          // Put the non-stack part(s) of the region back:
-          if (stack_start != start) {
-            lib->second.push_back(AllocObject(AsPtr(start), stack_start - start,
-                                  MAYBE_LIVE));
-          }
-          if (stack_end != end) {
-            lib->second.push_back(AllocObject(AsPtr(stack_end), end - stack_end,
-                                  MAYBE_LIVE));
-          }
-          return;
-        }
-      }
-    }
-    RAW_LOG(ERROR, "Memory region for stack at %p not found. "
-                   "Will likely report false leak positives.", top_ptr);
-  }
-}
-
-// Iterator for heap allocation map data to make ignored objects "live"
-// (i.e., treated as roots for the mark-and-sweep phase)
-static void MakeIgnoredObjectsLiveCallbackLocked(
-    const void* ptr, const HeapProfileTable::AllocInfo& info) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  if (info.ignored) {
-    live_objects->push_back(AllocObject(ptr, info.object_size,
-                                        MUST_BE_ON_HEAP));
-  }
-}
-
-// Iterator for heap allocation map data to make objects allocated from
-// disabled regions of code to be live.
-static void MakeDisabledLiveCallbackLocked(
-    const void* ptr, const HeapProfileTable::AllocInfo& info) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  bool stack_disable = false;
-  bool range_disable = false;
-  for (int depth = 0; depth < info.stack_depth; depth++) {
-    uintptr_t addr = AsInt(info.call_stack[depth]);
-    if (disabled_ranges) {
-      DisabledRangeMap::const_iterator iter
-        = disabled_ranges->upper_bound(addr);
-      if (iter != disabled_ranges->end()) {
-        RAW_DCHECK(iter->first > addr, "");
-        if (iter->second.start_address < addr  &&
-            iter->second.max_depth > depth) {
-          range_disable = true;  // in range; dropping
-          break;
-        }
-      }
-    }
-  }
-  if (stack_disable || range_disable) {
-    uintptr_t start_address = AsInt(ptr);
-    uintptr_t end_address = start_address + info.object_size;
-    StackTopSet::const_iterator iter
-      = stack_tops->lower_bound(start_address);
-    if (iter != stack_tops->end()) {
-      RAW_DCHECK(*iter >= start_address, "");
-      if (*iter < end_address) {
-        // We do not disable (treat as live) whole allocated regions
-        // if they are used to hold thread call stacks
-        // (i.e. when we find a stack inside).
-        // The reason is that we'll treat as live the currently used
-        // stack portions anyway (see RegisterStackLocked),
-        // and the rest of the region where the stack lives can well
-        // contain outdated stack variables which are not live anymore,
-        // hence should not be treated as such.
-        RAW_VLOG(11, "Not %s-disabling %" PRIuS " bytes at %p"
-                    ": have stack inside: %p",
-                    (stack_disable ? "stack" : "range"),
-                    info.object_size, ptr, AsPtr(*iter));
-        return;
-      }
-    }
-    RAW_VLOG(11, "%s-disabling %" PRIuS " bytes at %p",
-                (stack_disable ? "Stack" : "Range"), info.object_size, ptr);
-    live_objects->push_back(AllocObject(ptr, info.object_size,
-                                        MUST_BE_ON_HEAP));
-  }
-}
-
-static const char kUnnamedProcSelfMapEntry[] = "UNNAMED";
-
-// This function takes some fields from a /proc/self/maps line:
-//
-//   start_address  start address of a memory region.
-//   end_address    end address of a memory region
-//   permissions    rwx + private/shared bit
-//   filename       filename of the mapped file
-//
-// If the region is not writeable, then it cannot have any heap
-// pointers in it, otherwise we record it as a candidate live region
-// to get filtered later.
-static void RecordGlobalDataLocked(uintptr_t start_address,
-                                   uintptr_t end_address,
-                                   const char* permissions,
-                                   const char* filename) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Ignore non-writeable regions.
-  if (strchr(permissions, 'w') == NULL) return;
-  if (filename == NULL  ||  *filename == '\0') {
-    filename = kUnnamedProcSelfMapEntry;
-  }
-  RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
-              filename, start_address, end_address);
-  (*library_live_objects)[filename].
-    push_back(AllocObject(AsPtr(start_address),
-                          end_address - start_address,
-                          MAYBE_LIVE));
-}
-
-// See if 'library' from /proc/self/maps has base name 'library_base'
-// i.e. contains it and has '.' or '-' after it.
-static bool IsLibraryNamed(const char* library, const char* library_base) {
-  const char* p = hc_strstr(library, library_base);
-  size_t sz = strlen(library_base);
-  return p != NULL  &&  (p[sz] == '.'  ||  p[sz] == '-');
-}
-
-// static
-void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library,
-                                                 uintptr_t start_address,
-                                                 uintptr_t end_address) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  int depth = 0;
-  // TODO(maxim): maybe this should be extended to also use objdump
-  //              and pick the text portion of the library more precisely.
-  if (IsLibraryNamed(library, "/libpthread")  ||
-        // libpthread has a lot of small "system" leaks we don't care about.
-        // In particular it allocates memory to store data supplied via
-        // pthread_setspecific (which can be the only pointer to a heap object).
-      IsLibraryNamed(library, "/libdl")  ||
-        // library loaders leak some "system" heap that we don't care about
-      IsLibraryNamed(library, "/libcrypto")  ||
-        // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer
-        // (any library can be, of course, but this one often is because speed
-        // is so important for making crypto usable).  We ignore all its
-        // allocations because we can't see the call stacks.  We'd prefer
-        // to ignore allocations done in files/symbols that match
-        // "default_malloc_ex|default_realloc_ex"
-        // but that doesn't work when the end-result binary is stripped.
-      IsLibraryNamed(library, "/libjvm")  ||
-        // JVM has a lot of leaks we don't care about.
-      IsLibraryNamed(library, "/libzip")
-        // The JVM leaks java.util.zip.Inflater after loading classes.
-     ) {
-    depth = 1;  // only disable allocation calls directly from the library code
-  } else if (IsLibraryNamed(library, "/ld")
-               // library loader leaks some "system" heap
-               // (e.g. thread-local storage) that we don't care about
-            ) {
-    depth = 2;  // disable allocation calls directly from the library code
-                // and at depth 2 from it.
-    // We need depth 2 here solely because of a libc bug that
-    // forces us to jump through __memalign_hook and MemalignOverride hoops
-    // in tcmalloc.cc.
-    // Those buggy __libc_memalign() calls are in ld-linux.so and happen for
-    // thread-local storage allocations that we want to ignore here.
-    // We go with the depth-2 hack as a workaround for this libc bug:
-    // otherwise we'd need to extend MallocHook interface
-    // so that correct stack depth adjustment can be propagated from
-    // the exceptional case of MemalignOverride.
-    // Using depth 2 here should not mask real leaks because ld-linux.so
-    // does not call user code.
-  }
-  if (depth) {
-    RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth);
-    DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth);
-    if (IsLibraryNamed(library, "/libpthread")  ||
-        IsLibraryNamed(library, "/libdl")  ||
-        IsLibraryNamed(library, "/ld")) {
-      RAW_VLOG(10, "Global memory regions made by %s will be live data",
-                  library);
-      if (global_region_caller_ranges == NULL) {
-        global_region_caller_ranges =
-          new(Allocator::Allocate(sizeof(GlobalRegionCallerRangeMap)))
-            GlobalRegionCallerRangeMap;
-      }
-      global_region_caller_ranges
-        ->insert(make_pair(end_address, start_address));
-    }
-  }
-}
-
-// static
-HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked(
-                                  ProcMapsTask proc_maps_task) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Need to provide own scratch memory to ProcMapsIterator:
-  ProcMapsIterator::Buffer buffer;
-  ProcMapsIterator it(0, &buffer);
-  if (!it.Valid()) {
-    int errsv = errno;
-    RAW_LOG(ERROR, "Could not open /proc/self/maps: errno=%d. "
-                   "Libraries will not be handled correctly.", errsv);
-    return CANT_OPEN_PROC_MAPS;
-  }
-  uint64 start_address, end_address, file_offset;
-  int64 inode;
-  char *permissions, *filename;
-  bool saw_shared_lib = false;
-  bool saw_nonzero_inode = false;
-  bool saw_shared_lib_with_nonzero_inode = false;
-  while (it.Next(&start_address, &end_address, &permissions,
-                 &file_offset, &inode, &filename)) {
-    if (start_address >= end_address) {
-      // Warn if a line we can be interested in is ill-formed:
-      if (inode != 0) {
-        RAW_LOG(ERROR, "Errors reading /proc/self/maps. "
-                       "Some global memory regions will not "
-                       "be handled correctly.");
-      }
-      // Silently skip other ill-formed lines: some are possible
-      // probably due to the interplay of how /proc/self/maps is updated
-      // while we read it in chunks in ProcMapsIterator and
-      // do things in this loop.
-      continue;
-    }
-    // Determine if any shared libraries are present (this is the same
-    // list of extensions as is found in pprof).  We want to ignore
-    // 'fake' libraries with inode 0 when determining.  However, some
-    // systems don't share inodes via /proc, so we turn off this check
-    // if we don't see any evidence that we're getting inode info.
-    if (inode != 0) {
-      saw_nonzero_inode = true;
-    }
-    if ((hc_strstr(filename, "lib") && hc_strstr(filename, ".so")) ||
-        hc_strstr(filename, ".dll") ||
-        // not all .dylib filenames start with lib. .dylib is big enough
-        // that we are unlikely to get false matches just checking that.
-        hc_strstr(filename, ".dylib") || hc_strstr(filename, ".bundle")) {
-      saw_shared_lib = true;
-      if (inode != 0) {
-        saw_shared_lib_with_nonzero_inode = true;
-      }
-    }
-
-    switch (proc_maps_task) {
-      case DISABLE_LIBRARY_ALLOCS:
-        // All lines starting like
-        // "401dc000-4030f000 r??p 00132000 03:01 13991972  lib/bin"
-        // identify a data and code sections of a shared library or our binary
-        if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) {
-          DisableLibraryAllocsLocked(filename, start_address, end_address);
-        }
-        break;
-      case RECORD_GLOBAL_DATA:
-        RecordGlobalDataLocked(start_address, end_address,
-                               permissions, filename);
-        break;
-      default:
-        RAW_CHECK(0, "");
-    }
-  }
-  // If /proc/self/maps is reporting inodes properly (we saw a
-  // non-zero inode), then we only say we saw a shared lib if we saw a
-  // 'real' one, with a non-zero inode.
-  if (saw_nonzero_inode) {
-    saw_shared_lib = saw_shared_lib_with_nonzero_inode;
-  }
-  if (!saw_shared_lib) {
-    RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak "
-                   "positives for statically linked executables.");
-    return NO_SHARED_LIBS_IN_PROC_MAPS;
-  }
-  return PROC_MAPS_USED;
-}
-
-// Total number and size of live objects dropped from the profile;
-// (re)initialized in IgnoreAllLiveObjectsLocked.
-static int64 live_objects_total;
-static int64 live_bytes_total;
-
-// pid of the thread that is doing the current leak check
-// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
-static pid_t self_thread_pid = 0;
-
-// Status of our thread listing callback execution
-// (protected by our lock; used from within IgnoreAllLiveObjectsLocked)
-static enum {
-  CALLBACK_NOT_STARTED,
-  CALLBACK_STARTED,
-  CALLBACK_COMPLETED,
-} thread_listing_status = CALLBACK_NOT_STARTED;
-
-// Ideally to avoid deadlocks this function should not result in any libc
-// or other function calls that might need to lock a mutex:
-// It is called when all threads of a process are stopped
-// at arbitrary points thus potentially holding those locks.
-//
-// In practice we are calling some simple i/o and sprintf-type library functions
-// for logging messages, but use only our own LowLevelAlloc::Arena allocator.
-//
-// This is known to be buggy: the library i/o function calls are able to cause
-// deadlocks when they request a lock that a stopped thread happens to hold.
-// This issue as far as we know have so far not resulted in any deadlocks
-// in practice, so for now we are taking our chance that the deadlocks
-// have insignificant frequency.
-//
-// If such deadlocks become a problem we should make the i/o calls
-// into appropriately direct system calls (or eliminate them),
-// in particular write() is not safe and vsnprintf() is potentially dangerous
-// due to reliance on locale functions (these are called through RAW_LOG
-// and in other ways).
-//
-
-#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER)
-# if (defined(__i386__) || defined(__x86_64))
-#  define THREAD_REGS i386_regs
-# elif defined(__PPC__)
-#  define THREAD_REGS ppc_regs
-# endif
-#endif
-
-/*static*/ int HeapLeakChecker::IgnoreLiveThreadsLocked(void* parameter,
-                                                        int num_threads,
-                                                        pid_t* thread_pids,
-                                                        va_list /*ap*/) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  thread_listing_status = CALLBACK_STARTED;
-  RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid());
-
-  if (FLAGS_heap_check_ignore_global_live) {
-    UseProcMapsLocked(RECORD_GLOBAL_DATA);
-  }
-
-  // We put the registers from other threads here
-  // to make pointers stored in them live.
-  vector<void*, STL_Allocator<void*, Allocator> > thread_registers;
-
-  int failures = 0;
-  for (int i = 0; i < num_threads; ++i) {
-    // the leak checking thread itself is handled
-    // specially via self_thread_stack, not here:
-    if (thread_pids[i] == self_thread_pid) continue;
-    RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]);
-#ifdef THREAD_REGS
-    THREAD_REGS thread_regs;
-#define sys_ptrace(r, p, a, d)  syscall(SYS_ptrace, (r), (p), (a), (d))
-    // We use sys_ptrace to avoid thread locking
-    // because this is called from TCMalloc_ListAllProcessThreads
-    // when all but this thread are suspended.
-    if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) {
-      // Need to use SP to get all the data from the very last stack frame:
-      COMPILE_ASSERT(sizeof(thread_regs.SP) == sizeof(void*),
-                     SP_register_does_not_look_like_a_pointer);
-      RegisterStackLocked(reinterpret_cast<void*>(thread_regs.SP));
-      // Make registers live (just in case PTRACE_ATTACH resulted in some
-      // register pointers still being in the registers and not on the stack):
-      for (void** p = reinterpret_cast<void**>(&thread_regs);
-           p < reinterpret_cast<void**>(&thread_regs + 1); ++p) {
-        RAW_VLOG(12, "Thread register %p", *p);
-        thread_registers.push_back(*p);
-      }
-    } else {
-      failures += 1;
-    }
-#else
-    failures += 1;
-#endif
-  }
-  // Use all the collected thread (stack) liveness sources:
-  IgnoreLiveObjectsLocked("threads stack data", "");
-  if (thread_registers.size()) {
-    // Make thread registers be live heap data sources.
-    // we rely here on the fact that vector is in one memory chunk:
-    RAW_VLOG(11, "Live registers at %p of %" PRIuS " bytes",
-                &thread_registers[0], thread_registers.size() * sizeof(void*));
-    live_objects->push_back(AllocObject(&thread_registers[0],
-                                        thread_registers.size() * sizeof(void*),
-                                        THREAD_REGISTERS));
-    IgnoreLiveObjectsLocked("threads register data", "");
-  }
-  // Do all other liveness walking while all threads are stopped:
-  IgnoreNonThreadLiveObjectsLocked();
-  // Can now resume the threads:
-  TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids);
-  thread_listing_status = CALLBACK_COMPLETED;
-  return failures;
-}
-
-// Stack top of the thread that is doing the current leak check
-// (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
-static const void* self_thread_stack_top;
-
-// static
-void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid);
-  // Register our own stack:
-
-  // Important that all stack ranges (including the one here)
-  // are known before we start looking at them
-  // in MakeDisabledLiveCallbackLocked:
-  RegisterStackLocked(self_thread_stack_top);
-  IgnoreLiveObjectsLocked("stack data", "");
-
-  // Make objects we were told to ignore live:
-  if (ignored_objects) {
-    for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
-         object != ignored_objects->end(); ++object) {
-      const void* ptr = AsPtr(object->first);
-      RAW_VLOG(11, "Ignored live object at %p of %" PRIuS " bytes",
-                  ptr, object->second);
-      live_objects->
-        push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
-      // we do this liveness check for ignored_objects before doing any
-      // live heap walking to make sure it does not fail needlessly:
-      size_t object_size;
-      if (!(heap_profile->FindAlloc(ptr, &object_size)  &&
-            object->second == object_size)) {
-        RAW_LOG(FATAL, "Object at %p of %" PRIuS " bytes from an"
-                       " IgnoreObject() has disappeared", ptr, object->second);
-      }
-    }
-    IgnoreLiveObjectsLocked("ignored objects", "");
-  }
-
-  // Treat objects that were allocated when a Disabler was live as
-  // roots.  I.e., if X was allocated while a Disabler was active,
-  // and Y is reachable from X, arrange that neither X nor Y are
-  // treated as leaks.
-  heap_profile->IterateAllocs(MakeIgnoredObjectsLiveCallbackLocked);
-  IgnoreLiveObjectsLocked("disabled objects", "");
-
-  // Make code-address-disabled objects live and ignored:
-  // This in particular makes all thread-specific data live
-  // because the basic data structure to hold pointers to thread-specific data
-  // is allocated from libpthreads and we have range-disabled that
-  // library code with UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS);
-  // so now we declare all thread-specific data reachable from there as live.
-  heap_profile->IterateAllocs(MakeDisabledLiveCallbackLocked);
-  IgnoreLiveObjectsLocked("disabled code", "");
-
-  // Actually make global data live:
-  if (FLAGS_heap_check_ignore_global_live) {
-    bool have_null_region_callers = false;
-    for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin();
-         l != library_live_objects->end(); ++l) {
-      RAW_CHECK(live_objects->empty(), "");
-      // Process library_live_objects in l->second
-      // filtering them by MemoryRegionMap:
-      // It's safe to iterate over MemoryRegionMap
-      // w/o locks here as we are inside MemoryRegionMap::Lock():
-      RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-      // The only change to MemoryRegionMap possible in this loop
-      // is region addition as a result of allocating more memory
-      // for live_objects. This won't invalidate the RegionIterator
-      // or the intent of the loop.
-      // --see the comment by MemoryRegionMap::BeginRegionLocked().
-      for (MemoryRegionMap::RegionIterator region =
-             MemoryRegionMap::BeginRegionLocked();
-           region != MemoryRegionMap::EndRegionLocked(); ++region) {
-        // "region" from MemoryRegionMap is to be subtracted from
-        // (tentatively live) regions in l->second
-        // if it has a stack inside or it was allocated by
-        // a non-special caller (not one covered by a range
-        // in global_region_caller_ranges).
-        // This will in particular exclude all memory chunks used
-        // by the heap itself as well as what's been allocated with
-        // any allocator on top of mmap.
-        bool subtract = true;
-        if (!region->is_stack  &&  global_region_caller_ranges) {
-          if (region->caller() == static_cast<uintptr_t>(NULL)) {
-            have_null_region_callers = true;
-          } else {
-            GlobalRegionCallerRangeMap::const_iterator iter
-              = global_region_caller_ranges->upper_bound(region->caller());
-            if (iter != global_region_caller_ranges->end()) {
-              RAW_DCHECK(iter->first > region->caller(), "");
-              if (iter->second < region->caller()) {  // in special region
-                subtract = false;
-              }
-            }
-          }
-        }
-        if (subtract) {
-          // The loop puts the result of filtering l->second into live_objects:
-          for (LiveObjectsStack::const_iterator i = l->second.begin();
-               i != l->second.end(); ++i) {
-            // subtract *region from *i
-            uintptr_t start = AsInt(i->ptr);
-            uintptr_t end = start + i->size;
-            if (region->start_addr <= start  &&  end <= region->end_addr) {
-              // full deletion due to subsumption
-            } else if (start < region->start_addr  &&
-                       region->end_addr < end) {  // cutting-out split
-              live_objects->push_back(AllocObject(i->ptr,
-                                                  region->start_addr - start,
-                                                  IN_GLOBAL_DATA));
-              live_objects->push_back(AllocObject(AsPtr(region->end_addr),
-                                                  end - region->end_addr,
-                                                  IN_GLOBAL_DATA));
-            } else if (region->end_addr > start  &&
-                       region->start_addr <= start) {  // cut from start
-              live_objects->push_back(AllocObject(AsPtr(region->end_addr),
-                                                  end - region->end_addr,
-                                                  IN_GLOBAL_DATA));
-            } else if (region->start_addr > start  &&
-                       region->start_addr < end) {  // cut from end
-              live_objects->push_back(AllocObject(i->ptr,
-                                                  region->start_addr - start,
-                                                  IN_GLOBAL_DATA));
-            } else {  // pass: no intersection
-              live_objects->push_back(AllocObject(i->ptr, i->size,
-                                                  IN_GLOBAL_DATA));
-            }
-          }
-          // Move live_objects back into l->second
-          // for filtering by the next region.
-          live_objects->swap(l->second);
-          live_objects->clear();
-        }
-      }
-      // Now get and use live_objects from the final version of l->second:
-      if (VLOG_IS_ON(11)) {
-        for (LiveObjectsStack::const_iterator i = l->second.begin();
-             i != l->second.end(); ++i) {
-          RAW_VLOG(11, "Library live region at %p of %" PRIuPTR " bytes",
-                      i->ptr, i->size);
-        }
-      }
-      live_objects->swap(l->second);
-      IgnoreLiveObjectsLocked("in globals of\n  ", l->first.c_str());
-    }
-    if (have_null_region_callers) {
-      RAW_LOG(ERROR, "Have memory regions w/o callers: "
-                     "might report false leaks");
-    }
-    Allocator::DeleteAndNull(&library_live_objects);
-  }
-}
-
-// Callback for TCMalloc_ListAllProcessThreads in IgnoreAllLiveObjectsLocked below
-// to test/verify that we have just the one main thread, in which case
-// we can do everything in that main thread,
-// so that CPU profiler can collect all its samples.
-// Returns the number of threads in the process.
-static int IsOneThread(void* parameter, int num_threads,
-                       pid_t* thread_pids, va_list ap) {
-  if (num_threads != 1) {
-    RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of leak "
-                     "checking work happening in IgnoreLiveThreadsLocked!");
-  }
-  TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids);
-  return num_threads;
-}
-
-// Dummy for IgnoreAllLiveObjectsLocked below.
-// Making it global helps with compiler warnings.
-static va_list dummy_ap;
-
-// static
-void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_CHECK(live_objects == NULL, "");
-  live_objects = new(Allocator::Allocate(sizeof(LiveObjectsStack)))
-                   LiveObjectsStack;
-  stack_tops = new(Allocator::Allocate(sizeof(StackTopSet))) StackTopSet;
-  // reset the counts
-  live_objects_total = 0;
-  live_bytes_total = 0;
-  // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset
-  // for the time of leak check.
-  // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size
-  // to manage reasonably low chances of random bytes
-  // appearing to be pointing into large actually leaked heap objects.
-  const size_t old_max_heap_object_size = max_heap_object_size;
-  max_heap_object_size = (
-    FLAGS_heap_check_max_pointer_offset != -1
-    ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size)
-    : max_heap_object_size);
-  // Record global data as live:
-  if (FLAGS_heap_check_ignore_global_live) {
-    library_live_objects =
-      new(Allocator::Allocate(sizeof(LibraryLiveObjectsStacks)))
-        LibraryLiveObjectsStacks;
-  }
-  // Ignore all thread stacks:
-  thread_listing_status = CALLBACK_NOT_STARTED;
-  bool need_to_ignore_non_thread_objects = true;
-  self_thread_pid = getpid();
-  self_thread_stack_top = self_stack_top;
-  if (FLAGS_heap_check_ignore_thread_live) {
-    // In case we are doing CPU profiling we'd like to do all the work
-    // in the main thread, not in the special thread created by
-    // TCMalloc_ListAllProcessThreads, so that CPU profiler can
-    // collect all its samples.  The machinery of
-    // TCMalloc_ListAllProcessThreads conflicts with the CPU profiler
-    // by also relying on signals and ::sigaction.  We can do this
-    // (run everything in the main thread) safely only if there's just
-    // the main thread itself in our process.  This variable reflects
-    // these two conditions:
-    bool want_and_can_run_in_main_thread =
-      ProfilingIsEnabledForAllThreads()  &&
-      TCMalloc_ListAllProcessThreads(NULL, IsOneThread) == 1;
-    // When the normal path of TCMalloc_ListAllProcessThreads below is taken,
-    // we fully suspend the threads right here before any liveness checking
-    // and keep them suspended for the whole time of liveness checking
-    // inside of the IgnoreLiveThreadsLocked callback.
-    // (The threads can't (de)allocate due to lock on the delete hook but
-    //  if not suspended they could still mess with the pointer
-    //  graph while we walk it).
-    int r = want_and_can_run_in_main_thread
-            ? IgnoreLiveThreadsLocked(NULL, 1, &self_thread_pid, dummy_ap)
-            : TCMalloc_ListAllProcessThreads(NULL, IgnoreLiveThreadsLocked);
-    need_to_ignore_non_thread_objects = r < 0;
-    if (r < 0) {
-      RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno);
-      if (thread_listing_status == CALLBACK_COMPLETED) {
-        RAW_LOG(INFO, "Thread finding callback "
-                      "finished ok; hopefully everything is fine");
-        need_to_ignore_non_thread_objects = false;
-      } else if (thread_listing_status == CALLBACK_STARTED) {
-        RAW_LOG(FATAL, "Thread finding callback was "
-                       "interrupted or crashed; can't fix this");
-      } else {  // CALLBACK_NOT_STARTED
-        RAW_LOG(ERROR, "Could not find thread stacks. "
-                       "Will likely report false leak positives.");
-      }
-    } else if (r != 0) {
-      RAW_LOG(ERROR, "Thread stacks not found for %d threads. "
-                     "Will likely report false leak positives.", r);
-    } else {
-      RAW_VLOG(11, "Thread stacks appear to be found for all threads");
-    }
-  } else {
-    RAW_LOG(WARNING, "Not looking for thread stacks; "
-                     "objects reachable only from there "
-                     "will be reported as leaks");
-  }
-  // Do all other live data ignoring here if we did not do it
-  // within thread listing callback with all threads stopped.
-  if (need_to_ignore_non_thread_objects) {
-    if (FLAGS_heap_check_ignore_global_live) {
-      UseProcMapsLocked(RECORD_GLOBAL_DATA);
-    }
-    IgnoreNonThreadLiveObjectsLocked();
-  }
-  if (live_objects_total) {
-    RAW_VLOG(10, "Ignoring %" PRId64 " reachable objects of %" PRId64 " bytes",
-                live_objects_total, live_bytes_total);
-  }
-  // Free these: we made them here and heap_profile never saw them
-  Allocator::DeleteAndNull(&live_objects);
-  Allocator::DeleteAndNull(&stack_tops);
-  max_heap_object_size = old_max_heap_object_size;  // reset this var
-}
-
-// Alignment at which we should consider pointer positions
-// in IgnoreLiveObjectsLocked. Will normally use the value of
-// FLAGS_heap_check_pointer_source_alignment.
-static size_t pointer_source_alignment = kPointerSourceAlignment;
-// Global lock for HeapLeakChecker::DoNoLeaks
-// to protect pointer_source_alignment.
-static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
-
-// This function changes the live bits in the heap_profile-table's state:
-// we only record the live objects to be skipped.
-//
-// When checking if a byte sequence points to a heap object we use
-// HeapProfileTable::FindInsideAlloc to handle both pointers to
-// the start and inside of heap-allocated objects.
-// The "inside" case needs to be checked to support
-// at least the following relatively common cases:
-// - C++ arrays allocated with new FooClass[size] for classes
-//   with destructors have their size recorded in a sizeof(int) field
-//   before the place normal pointers point to.
-// - basic_string<>-s for e.g. the C++ library of gcc 3.4
-//   have the meta-info in basic_string<...>::_Rep recorded
-//   before the place normal pointers point to.
-// - Multiple-inherited objects have their pointers when cast to
-//   different base classes pointing inside of the actually
-//   allocated object.
-// - Sometimes reachability pointers point to member objects of heap objects,
-//   and then those member objects point to the full heap object.
-// - Third party UnicodeString: it stores a 32-bit refcount
-//   (in both 32-bit and 64-bit binaries) as the first uint32
-//   in the allocated memory and a normal pointer points at
-//   the second uint32 behind the refcount.
-// By finding these additional objects here
-// we slightly increase the chance to mistake random memory bytes
-// for a pointer and miss a leak in a particular run of a binary.
-//
-/*static*/ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
-                                                         const char* name2) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  int64 live_object_count = 0;
-  int64 live_byte_count = 0;
-  while (!live_objects->empty()) {
-    const char* object =
-      reinterpret_cast<const char*>(live_objects->back().ptr);
-    size_t size = live_objects->back().size;
-    const ObjectPlacement place = live_objects->back().place;
-    live_objects->pop_back();
-    if (place == MUST_BE_ON_HEAP  &&  heap_profile->MarkAsLive(object)) {
-      live_object_count += 1;
-      live_byte_count += size;
-    }
-    RAW_VLOG(13, "Looking for heap pointers in %p of %" PRIuS " bytes",
-                object, size);
-    const char* const whole_object = object;
-    size_t const whole_size = size;
-    // Try interpretting any byte sequence in object,size as a heap pointer:
-    const size_t remainder = AsInt(object) % pointer_source_alignment;
-    if (remainder) {
-      object += pointer_source_alignment - remainder;
-      if (size >= pointer_source_alignment - remainder) {
-        size -= pointer_source_alignment - remainder;
-      } else {
-        size = 0;
-      }
-    }
-    if (size < sizeof(void*)) continue;
-
-#ifdef NO_FRAME_POINTER
-    // Frame pointer omission requires us to use libunwind, which uses direct
-    // mmap and munmap system calls, and that needs special handling.
-    if (name2 == kUnnamedProcSelfMapEntry) {
-      static const uintptr_t page_mask = ~(getpagesize() - 1);
-      const uintptr_t addr = reinterpret_cast<uintptr_t>(object);
-      if ((addr & page_mask) == 0 && (size & page_mask) == 0) {
-        // This is an object we slurped from /proc/self/maps.
-        // It may or may not be readable at this point.
-        //
-        // In case all the above conditions made a mistake, and the object is
-        // not related to libunwind, we also verify that it's not readable
-        // before ignoring it.
-        if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) {
-          // Skip unreadable object, so we don't crash trying to sweep it.
-          RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) "
-                   "(msync error %d (%s))",
-                   object, object + size, errno, strerror(errno));
-          continue;
-        }
-      }
-    }
-#endif
-
-    const char* const max_object = object + size - sizeof(void*);
-    while (object <= max_object) {
-      // potentially unaligned load:
-      const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object);
-      // Do fast check before the more expensive HaveOnHeapLocked lookup:
-      // this code runs for all memory words that are potentially pointers:
-      const bool can_be_on_heap =
-        // Order tests by the likelyhood of the test failing in 64/32 bit modes.
-        // Yes, this matters: we either lose 5..6% speed in 32 bit mode
-        // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode.
-        // After the alignment test got dropped the above performance figures
-        // must have changed; might need to revisit this.
-#if defined(__x86_64__)
-        addr <= max_heap_address  &&  // <= is for 0-sized object with max addr
-        min_heap_address <= addr;
-#else
-        min_heap_address <= addr  &&
-        addr <= max_heap_address;  // <= is for 0-sized object with max addr
-#endif
-      if (can_be_on_heap) {
-        const void* ptr = reinterpret_cast<const void*>(addr);
-        // Too expensive (inner loop): manually uncomment when debugging:
-        // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object);
-        size_t object_size;
-        if (HaveOnHeapLocked(&ptr, &object_size)  &&
-            heap_profile->MarkAsLive(ptr)) {
-          // We take the (hopefully low) risk here of encountering by accident
-          // a byte sequence in memory that matches an address of
-          // a heap object which is in fact leaked.
-          // I.e. in very rare and probably not repeatable/lasting cases
-          // we might miss some real heap memory leaks.
-          RAW_VLOG(14, "Found pointer to %p of %" PRIuS " bytes at %p "
-                      "inside %p of size %" PRIuS "",
-                      ptr, object_size, object, whole_object, whole_size);
-          if (VLOG_IS_ON(15)) {
-            // log call stacks to help debug how come something is not a leak
-            HeapProfileTable::AllocInfo alloc;
-            if (!heap_profile->FindAllocDetails(ptr, &alloc)) {
-              RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr);
-            }
-            RAW_LOG(INFO, "New live %p object's alloc stack:", ptr);
-            for (int i = 0; i < alloc.stack_depth; ++i) {
-              RAW_LOG(INFO, "  @ %p", alloc.call_stack[i]);
-            }
-          }
-          live_object_count += 1;
-          live_byte_count += object_size;
-          live_objects->push_back(AllocObject(ptr, object_size,
-                                              IGNORED_ON_HEAP));
-        }
-      }
-      object += pointer_source_alignment;
-    }
-  }
-  live_objects_total += live_object_count;
-  live_bytes_total += live_byte_count;
-  if (live_object_count) {
-    RAW_VLOG(10, "Removed %" PRId64 " live heap objects of %" PRId64 " bytes: %s%s",
-                live_object_count, live_byte_count, name, name2);
-  }
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker leak check disabling components
-//----------------------------------------------------------------------
-
-// static
-void HeapLeakChecker::DisableChecksIn(const char* pattern) {
-  RAW_LOG(WARNING, "DisableChecksIn(%s) is ignored", pattern);
-}
-
-// static
-void HeapLeakChecker::DoIgnoreObject(const void* ptr) {
-  SpinLockHolder l(&heap_checker_lock);
-  if (!heap_checker_on) return;
-  size_t object_size;
-  if (!HaveOnHeapLocked(&ptr, &object_size)) {
-    RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
-  } else {
-    RAW_VLOG(10, "Going to ignore live object at %p of %" PRIuS " bytes",
-                ptr, object_size);
-    if (ignored_objects == NULL)  {
-      ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap)))
-                          IgnoredObjectsMap;
-    }
-    if (!ignored_objects->insert(make_pair(AsInt(ptr), object_size)).second) {
-      RAW_LOG(WARNING, "Object at %p is already being ignored", ptr);
-    }
-  }
-}
-
-// static
-void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
-  SpinLockHolder l(&heap_checker_lock);
-  if (!heap_checker_on) return;
-  size_t object_size;
-  if (!HaveOnHeapLocked(&ptr, &object_size)) {
-    RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr);
-  } else {
-    bool found = false;
-    if (ignored_objects) {
-      IgnoredObjectsMap::iterator object = ignored_objects->find(AsInt(ptr));
-      if (object != ignored_objects->end()  &&  object_size == object->second) {
-        ignored_objects->erase(object);
-        found = true;
-        RAW_VLOG(10, "Now not going to ignore live object "
-                    "at %p of %" PRIuS " bytes", ptr, object_size);
-      }
-    }
-    if (!found)  RAW_LOG(FATAL, "Object at %p has not been ignored", ptr);
-  }
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker non-static functions
-//----------------------------------------------------------------------
-
-char* HeapLeakChecker::MakeProfileNameLocked() {
-  RAW_DCHECK(lock_->IsHeld(), "");
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  const int len = profile_name_prefix->size() + strlen(name_) + 5 +
-                  strlen(HeapProfileTable::kFileExt) + 1;
-  char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len));
-  snprintf(file_name, len, "%s.%s-end%s",
-           profile_name_prefix->c_str(), name_,
-           HeapProfileTable::kFileExt);
-  return file_name;
-}
-
-void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) {
-  SpinLockHolder l(lock_);
-  name_ = NULL;  // checker is inactive
-  start_snapshot_ = NULL;
-  has_checked_ = false;
-  inuse_bytes_increase_ = 0;
-  inuse_allocs_increase_ = 0;
-  keep_profiles_ = false;
-  char* n = new char[strlen(name) + 1];   // do this before we lock
-  IgnoreObject(n);  // otherwise it might be treated as live due to our stack
-  { // Heap activity in other threads is paused for this whole scope.
-    SpinLockHolder al(&alignment_checker_lock);
-    SpinLockHolder hl(&heap_checker_lock);
-    MemoryRegionMap::LockHolder ml;
-    if (heap_checker_on  &&  profile_name_prefix != NULL) {
-      RAW_DCHECK(strchr(name, '/') == NULL, "must be a simple name");
-      memcpy(n, name, strlen(name) + 1);
-      name_ = n;  // checker is active
-      if (make_start_snapshot) {
-        start_snapshot_ = heap_profile->TakeSnapshot();
-      }
-
-      const HeapProfileTable::Stats& t = heap_profile->total();
-      const size_t start_inuse_bytes = t.alloc_size - t.free_size;
-      const size_t start_inuse_allocs = t.allocs - t.frees;
-      RAW_VLOG(10, "Start check \"%s\" profile: %" PRIuS " bytes "
-               "in %" PRIuS " objects",
-               name_, start_inuse_bytes, start_inuse_allocs);
-    } else {
-      RAW_LOG(WARNING, "Heap checker is not active, "
-                       "hence checker \"%s\" will do nothing!", name);
-    RAW_LOG(WARNING, "To activate set the HEAPCHECK environment variable.\n");
-    }
-  }
-  if (name_ == NULL) {
-    UnIgnoreObject(n);
-    delete[] n;  // must be done after we unlock
-  }
-}
-
-HeapLeakChecker::HeapLeakChecker(const char *name) : lock_(new SpinLock) {
-  RAW_DCHECK(strcmp(name, "_main_") != 0, "_main_ is reserved");
-  Create(name, true/*create start_snapshot_*/);
-}
-
-HeapLeakChecker::HeapLeakChecker() : lock_(new SpinLock) {
-  if (FLAGS_heap_check_before_constructors) {
-    // We want to check for leaks of objects allocated during global
-    // constructors (i.e., objects allocated already).  So we do not
-    // create a baseline snapshot and hence check for leaks of objects
-    // that may have already been created.
-    Create("_main_", false);
-  } else {
-    // We want to ignore leaks of objects allocated during global
-    // constructors (i.e., objects allocated already).  So we snapshot
-    // the current heap contents and use them as a baseline that is
-    // not reported by the leak checker.
-    Create("_main_", true);
-  }
-}
-
-ssize_t HeapLeakChecker::BytesLeaked() const {
-  SpinLockHolder l(lock_);
-  if (!has_checked_) {
-    RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call");
-  }
-  return inuse_bytes_increase_;
-}
-
-ssize_t HeapLeakChecker::ObjectsLeaked() const {
-  SpinLockHolder l(lock_);
-  if (!has_checked_) {
-    RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call");
-  }
-  return inuse_allocs_increase_;
-}
-
-// Save pid of main thread for using in naming dump files
-static int32 main_thread_pid = getpid();
-#ifdef HAVE_PROGRAM_INVOCATION_NAME
-#ifdef __UCLIBC__
-extern const char* program_invocation_name;
-extern const char* program_invocation_short_name;
-#else
-extern char* program_invocation_name;
-extern char* program_invocation_short_name;
-#endif
-static const char* invocation_name() { return program_invocation_short_name; }
-static string invocation_path() { return program_invocation_name; }
-#else
-static const char* invocation_name() { return "<your binary>"; }
-static string invocation_path() { return "<your binary>"; }
-#endif
-
-// Prints commands that users can run to get more information
-// about the reported leaks.
-static void SuggestPprofCommand(const char* pprof_file_arg) {
-  // Extra help information to print for the user when the test is
-  // being run in a way where the straightforward pprof command will
-  // not suffice.
-  string extra_help;
-
-  // Common header info to print for remote runs
-  const string remote_header =
-      "This program is being executed remotely and therefore the pprof\n"
-      "command printed above will not work.  Either run this program\n"
-      "locally, or adjust the pprof command as follows to allow it to\n"
-      "work on your local machine:\n";
-
-  // Extra command for fetching remote data
-  string fetch_cmd;
-
-  RAW_LOG(WARNING,
-          "\n\n"
-          "If the preceding stack traces are not enough to find "
-          "the leaks, try running THIS shell command:\n\n"
-          "%s%s %s \"%s\" --inuse_objects --lines --heapcheck "
-          " --edgefraction=1e-10 --nodefraction=1e-10 --gv\n"
-          "\n"
-          "%s"
-          "If you are still puzzled about why the leaks are "
-          "there, try rerunning this program with "
-          "HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with "
-          "HEAP_CHECK_MAX_POINTER_OFFSET=-1\n"
-          "If the leak report occurs in a small fraction of runs, "
-          "try running with TCMALLOC_MAX_FREE_QUEUE_SIZE of few hundred MB "
-          "or with TCMALLOC_RECLAIM_MEMORY=false, "  // only works for debugalloc
-          "it might help find leaks more repeatably\n",
-          fetch_cmd.c_str(),
-          "pprof",           // works as long as pprof is on your path
-          invocation_path().c_str(),
-          pprof_file_arg,
-          extra_help.c_str()
-          );
-}
-
-bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
-  SpinLockHolder l(lock_);
-  // The locking also helps us keep the messages
-  // for the two checks close together.
-  SpinLockHolder al(&alignment_checker_lock);
-
-  // thread-safe: protected by alignment_checker_lock
-  static bool have_disabled_hooks_for_symbolize = false;
-  // Once we've checked for leaks and symbolized the results once, it's
-  // not safe to do it again.  This is because in order to symbolize
-  // safely, we had to disable all the malloc hooks here, so we no
-  // longer can be confident we've collected all the data we need.
-  if (have_disabled_hooks_for_symbolize) {
-    RAW_LOG(FATAL, "Must not call heap leak checker manually after "
-            " program-exit's automatic check.");
-  }
-
-  HeapProfileTable::Snapshot* leaks = NULL;
-  char* pprof_file = NULL;
-
-  {
-    // Heap activity in other threads is paused during this function
-    // (i.e. until we got all profile difference info).
-    SpinLockHolder hl(&heap_checker_lock);
-    if (heap_checker_on == false) {
-      if (name_ != NULL) {  // leak checking enabled when created the checker
-        RAW_LOG(WARNING, "Heap leak checker got turned off after checker "
-                "\"%s\" has been created, no leak check is being done for it!",
-                name_);
-      }
-      return true;
-    }
-
-    // Update global_region_caller_ranges. They may need to change since
-    // e.g. initialization because shared libraries might have been loaded or
-    // unloaded.
-    Allocator::DeleteAndNullIfNot(&global_region_caller_ranges);
-    ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS);
-    RAW_CHECK(pm_result == PROC_MAPS_USED, "");
-
-    // Keep track of number of internally allocated objects so we
-    // can detect leaks in the heap-leak-checket itself
-    const int initial_allocs = Allocator::alloc_count();
-
-    if (name_ == NULL) {
-      RAW_LOG(FATAL, "Heap leak checker must not be turned on "
-              "after construction of a HeapLeakChecker");
-    }
-
-    MemoryRegionMap::LockHolder ml;
-    int a_local_var;  // Use our stack ptr to make stack data live:
-
-    // Make the heap profile, other threads are locked out.
-    HeapProfileTable::Snapshot* base =
-        reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_);
-    RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
-    pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
-    IgnoreAllLiveObjectsLocked(&a_local_var);
-    leaks = heap_profile->NonLiveSnapshot(base);
-
-    inuse_bytes_increase_ = static_cast<ssize_t>(leaks->total().alloc_size);
-    inuse_allocs_increase_ = static_cast<ssize_t>(leaks->total().allocs);
-    if (leaks->Empty()) {
-      heap_profile->ReleaseSnapshot(leaks);
-      leaks = NULL;
-
-      // We can only check for internal leaks along the no-user-leak
-      // path since in the leak path we temporarily release
-      // heap_checker_lock and another thread can come in and disturb
-      // allocation counts.
-      if (Allocator::alloc_count() != initial_allocs) {
-        RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects ; %d -> %d",
-                Allocator::alloc_count() - initial_allocs,
-                initial_allocs, Allocator::alloc_count());
-      }
-    } else if (FLAGS_heap_check_test_pointer_alignment) {
-      if (pointer_source_alignment == 1) {
-        RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: "
-                "--heap_check_pointer_source_alignment was already set to 1");
-      } else {
-        // Try with reduced pointer aligment
-        pointer_source_alignment = 1;
-        IgnoreAllLiveObjectsLocked(&a_local_var);
-        HeapProfileTable::Snapshot* leaks_wo_align =
-            heap_profile->NonLiveSnapshot(base);
-        pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
-        if (leaks_wo_align->Empty()) {
-          RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
-                  "something might be placing pointers at "
-                  "unaligned addresses! This needs to be fixed.");
-        } else {
-          RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
-                  "unaligned pointers must not be the cause of leaks.");
-          RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
-                  "to diagnose the leaks.");
-        }
-        heap_profile->ReleaseSnapshot(leaks_wo_align);
-      }
-    }
-
-    if (leaks != NULL) {
-      pprof_file = MakeProfileNameLocked();
-    }
-  }
-
-  has_checked_ = true;
-  if (leaks == NULL) {
-    if (FLAGS_heap_check_max_pointer_offset == -1) {
-      RAW_LOG(WARNING,
-              "Found no leaks without max_pointer_offset restriction: "
-              "it's possible that the default value of "
-              "heap_check_max_pointer_offset flag is too low. "
-              "Do you use pointers with larger than that offsets "
-              "pointing in the middle of heap-allocated objects?");
-    }
-    const HeapProfileTable::Stats& stats = heap_profile->total();
-    RAW_VLOG(heap_checker_info_level,
-             "No leaks found for check \"%s\" "
-             "(but no 100%% guarantee that there aren't any): "
-             "found %" PRId64 " reachable heap objects of %" PRId64 " bytes",
-             name_,
-             int64(stats.allocs - stats.frees),
-             int64(stats.alloc_size - stats.free_size));
-  } else {
-    if (should_symbolize == SYMBOLIZE) {
-      // To turn addresses into symbols, we need to fork, which is a
-      // problem if both parent and child end up trying to call the
-      // same malloc-hooks we've set up, at the same time.  To avoid
-      // trouble, we turn off the hooks before symbolizing.  Note that
-      // this makes it unsafe to ever leak-report again!  Luckily, we
-      // typically only want to report once in a program's run, at the
-      // very end.
-      if (MallocHook::GetNewHook() == NewHook)
-        MallocHook::SetNewHook(NULL);
-      if (MallocHook::GetDeleteHook() == DeleteHook)
-        MallocHook::SetDeleteHook(NULL);
-      MemoryRegionMap::Shutdown();
-      // Make sure all the hooks really got unset:
-      RAW_CHECK(MallocHook::GetNewHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetDeleteHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetMmapHook() == NULL, "");
-      RAW_CHECK(MallocHook::GetSbrkHook() == NULL, "");
-      have_disabled_hooks_for_symbolize = true;
-      leaks->ReportLeaks(name_, pprof_file, true);  // true = should_symbolize
-    } else {
-      leaks->ReportLeaks(name_, pprof_file, false);
-    }
-    if (FLAGS_heap_check_identify_leaks) {
-      leaks->ReportIndividualObjects();
-    }
-
-    SuggestPprofCommand(pprof_file);
-
-    {
-      SpinLockHolder hl(&heap_checker_lock);
-      heap_profile->ReleaseSnapshot(leaks);
-      Allocator::Free(pprof_file);
-    }
-  }
-
-  return (leaks == NULL);
-}
-
-HeapLeakChecker::~HeapLeakChecker() {
-  if (name_ != NULL) {  // had leak checking enabled when created the checker
-    if (!has_checked_) {
-      RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method"
-                     " must be called on any created HeapLeakChecker");
-    }
-
-    // Deallocate any snapshot taken at start
-    if (start_snapshot_ != NULL) {
-      SpinLockHolder l(&heap_checker_lock);
-      heap_profile->ReleaseSnapshot(
-          reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_));
-    }
-
-    UnIgnoreObject(name_);
-    delete[] name_;
-    name_ = NULL;
-  }
-  delete lock_;
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker overall heap check components
-//----------------------------------------------------------------------
-
-// static
-bool HeapLeakChecker::IsActive() {
-  SpinLockHolder l(&heap_checker_lock);
-  return heap_checker_on;
-}
-
-vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;
-
-// When a HeapCleaner object is intialized, add its function to the static list
-// of cleaners to be run before leaks checking.
-HeapCleaner::HeapCleaner(void_function f) {
-  if (heap_cleanups_ == NULL)
-    heap_cleanups_ = new vector<HeapCleaner::void_function>;
-  heap_cleanups_->push_back(f);
-}
-
-// Run all of the cleanup functions and delete the vector.
-void HeapCleaner::RunHeapCleanups() {
-  if (!heap_cleanups_)
-    return;
-  for (int i = 0; i < heap_cleanups_->size(); i++) {
-    void (*f)(void) = (*heap_cleanups_)[i];
-    f();
-  }
-  delete heap_cleanups_;
-  heap_cleanups_ = NULL;
-}
-
-// Program exit heap cleanup registered as a module object destructor.
-// Will not get executed when we crash on a signal.
-//
-void HeapLeakChecker_RunHeapCleanups() {
-  if (FLAGS_heap_check == "local")   // don't check heap in this mode
-    return;
-  { SpinLockHolder l(&heap_checker_lock);
-    // can get here (via forks?) with other pids
-    if (heap_checker_pid != getpid()) return;
-  }
-  HeapCleaner::RunHeapCleanups();
-  if (!FLAGS_heap_check_after_destructors) HeapLeakChecker::DoMainHeapCheck();
-}
-
-static bool internal_init_start_has_run = false;
-
-// Called exactly once, before main() (but hopefully just before).
-// This picks a good unique name for the dumped leak checking heap profiles.
-//
-// Because we crash when InternalInitStart is called more than once,
-// it's fine that we hold heap_checker_lock only around pieces of
-// this function: this is still enough for thread-safety w.r.t. other functions
-// of this module.
-// We can't hold heap_checker_lock throughout because it would deadlock
-// on a memory allocation since our new/delete hooks can be on.
-//
-void HeapLeakChecker_InternalInitStart() {
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(!internal_init_start_has_run,
-              "Heap-check constructor called twice.  Perhaps you both linked"
-              " in the heap checker, and also used LD_PRELOAD to load it?");
-    internal_init_start_has_run = true;
-
-#ifdef ADDRESS_SANITIZER
-    // AddressSanitizer's custom malloc conflicts with HeapChecker.
-    FLAGS_heap_check = "";
-#endif
-
-    if (FLAGS_heap_check.empty()) {
-      // turns out we do not need checking in the end; can stop profiling
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    } else if (RunningOnValgrind()) {
-      // There is no point in trying -- we'll just fail.
-      RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off");
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    }
-  }
-
-  // Changing this to false can be useful when debugging heap-checker itself:
-  if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) {
-    RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off");
-    SpinLockHolder l(&heap_checker_lock);
-    HeapLeakChecker::TurnItselfOffLocked();
-    return;
-  }
-
-  { SpinLockHolder l(&heap_checker_lock);
-    if (!constructor_heap_profiling) {
-      RAW_LOG(FATAL, "Can not start so late. You have to enable heap checking "
-	             "with HEAPCHECK=<mode>.");
-    }
-  }
-
-  // Set all flags
-  RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
-  if (FLAGS_heap_check == "minimal") {
-    // The least we can check.
-    FLAGS_heap_check_before_constructors = false;  // from after main
-                                                   // (ignore more)
-    FLAGS_heap_check_after_destructors = false;  // to after cleanup
-                                                 // (most data is live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "normal") {
-    // Faster than 'minimal' and not much stricter.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = false;  // to after cleanup
-                                                 // (most data is live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "strict") {
-    // A bit stricter than 'normal': global destructors must fully clean up
-    // after themselves if they are present.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = true;  // to after destructors
-                                                // (less data live)
-    FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
-    FLAGS_heap_check_ignore_global_live = true;  // ignore all live
-  } else if (FLAGS_heap_check == "draconian") {
-    // Drop not very portable and not very exact live heap flooding.
-    FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
-    FLAGS_heap_check_after_destructors = true;  // to after destructors
-                                                // (need them)
-    FLAGS_heap_check_ignore_thread_live = false;  // no live flood (stricter)
-    FLAGS_heap_check_ignore_global_live = false;  // no live flood (stricter)
-  } else if (FLAGS_heap_check == "as-is") {
-    // do nothing: use other flags as is
-  } else if (FLAGS_heap_check == "local") {
-    // do nothing
-  } else {
-    RAW_LOG(FATAL, "Unsupported heap_check flag: %s",
-                   FLAGS_heap_check.c_str());
-  }
-  // FreeBSD doesn't seem to honor atexit execution order:
-  //    http://code.google.com/p/gperftools/issues/detail?id=375
-  // Since heap-checking before destructors depends on atexit running
-  // at the right time, on FreeBSD we always check after, even in the
-  // less strict modes.  This just means FreeBSD is always a bit
-  // stricter in its checking than other OSes.
-  // This now appears to be the case in other OSes as well;
-  // so always check afterwards.
-  FLAGS_heap_check_after_destructors = true;
-
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(heap_checker_pid == getpid(), "");
-    heap_checker_on = true;
-    RAW_DCHECK(heap_profile, "");
-    HeapLeakChecker::ProcMapsResult pm_result = HeapLeakChecker::UseProcMapsLocked(HeapLeakChecker::DISABLE_LIBRARY_ALLOCS);
-      // might neeed to do this more than once
-      // if one later dynamically loads libraries that we want disabled
-    if (pm_result != HeapLeakChecker::PROC_MAPS_USED) {  // can't function
-      HeapLeakChecker::TurnItselfOffLocked();
-      return;
-    }
-  }
-
-  // make a good place and name for heap profile leak dumps
-  string* profile_prefix =
-    new string(FLAGS_heap_check_dump_directory + "/" + invocation_name());
-
-  // Finalize prefix for dumping leak checking profiles.
-  const int32 our_pid = getpid();   // safest to call getpid() outside lock
-  { SpinLockHolder l(&heap_checker_lock);
-    // main_thread_pid might still be 0 if this function is being called before
-    // global constructors.  In that case, our pid *is* the main pid.
-    if (main_thread_pid == 0)
-      main_thread_pid = our_pid;
-  }
-  char pid_buf[15];
-  snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid);
-  *profile_prefix += pid_buf;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(profile_name_prefix == NULL, "");
-    profile_name_prefix = profile_prefix;
-  }
-
-  // Make sure new/delete hooks are installed properly
-  // and heap profiler is indeed able to keep track
-  // of the objects being allocated.
-  // We test this to make sure we are indeed checking for leaks.
-  char* test_str = new char[5];
-  size_t size;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_profile->FindAlloc(test_str, &size),
-              "our own new/delete not linked?");
-  }
-  delete[] test_str;
-  { SpinLockHolder l(&heap_checker_lock);
-    // This check can fail when it should not if another thread allocates
-    // into this same spot right this moment,
-    // which is unlikely since this code runs in InitGoogle.
-    RAW_CHECK(!heap_profile->FindAlloc(test_str, &size),
-              "our own new/delete not linked?");
-  }
-  // If we crash in the above code, it probably means that
-  // "nm <this_binary> | grep new" will show that tcmalloc's new/delete
-  // implementation did not get linked-in into this binary
-  // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined).
-  // If this happens, it is a BUILD bug to be fixed.
-
-  RAW_VLOG(heap_checker_info_level,
-           "WARNING: Perftools heap leak checker is active "
-           "-- Performance may suffer");
-
-  if (FLAGS_heap_check != "local") {
-    HeapLeakChecker* main_hc = new HeapLeakChecker();
-    SpinLockHolder l(&heap_checker_lock);
-    RAW_DCHECK(main_heap_checker == NULL,
-               "Repeated creation of main_heap_checker");
-    main_heap_checker = main_hc;
-    do_main_heap_check = true;
-  }
-
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_checker_on  &&  constructor_heap_profiling,
-              "Leak checking is expected to be fully turned on now");
-  }
-
-  // For binaries built in debug mode, this will set release queue of
-  // debugallocation.cc to 100M to make it less likely for real leaks to
-  // be hidden due to reuse of heap memory object addresses.
-  // Running a test with --malloc_reclaim_memory=0 would help find leaks even
-  // better, but the test might run out of memory as a result.
-  // The scenario is that a heap object at address X is allocated and freed,
-  // but some other data-structure still retains a pointer to X.
-  // Then the same heap memory is used for another object, which is leaked,
-  // but the leak is not noticed due to the pointer to the original object at X.
-  // TODO(csilvers): support this in some manner.
-#if 0
-  SetCommandLineOptionWithMode("max_free_queue_size", "104857600",  // 100M
-                               SET_FLAG_IF_DEFAULT);
-#endif
-}
-
-// We want this to run early as well, but not so early as
-// ::BeforeConstructors (we want flag assignments to have already
-// happened, for instance).  Initializer-registration does the trick.
-REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker_InternalInitStart());
-REGISTER_MODULE_DESTRUCTOR(init_start, HeapLeakChecker_RunHeapCleanups());
-
-// static
-bool HeapLeakChecker::NoGlobalLeaksMaybeSymbolize(
-    ShouldSymbolize should_symbolize) {
-  // we never delete or change main_heap_checker once it's set:
-  HeapLeakChecker* main_hc = GlobalChecker();
-  if (main_hc) {
-    RAW_VLOG(10, "Checking for whole-program memory leaks");
-    return main_hc->DoNoLeaks(should_symbolize);
-  }
-  return true;
-}
-
-// static
-bool HeapLeakChecker::DoMainHeapCheck() {
-  if (FLAGS_heap_check_delay_seconds > 0) {
-    sleep(FLAGS_heap_check_delay_seconds);
-  }
-  { SpinLockHolder l(&heap_checker_lock);
-    if (!do_main_heap_check) return false;
-    RAW_DCHECK(heap_checker_pid == getpid(), "");
-    do_main_heap_check = false;  // will do it now; no need to do it more
-  }
-
-  // The program is over, so it's safe to symbolize addresses (which
-  // requires a fork) because no serious work is expected to be done
-  // after this.  Symbolizing is really useful -- knowing what
-  // function has a leak is better than knowing just an address --
-  // and while we can only safely symbolize once in a program run,
-  // now is the time (after all, there's no "later" that would be better).
-  if (!NoGlobalLeaksMaybeSymbolize(SYMBOLIZE)) {
-    if (FLAGS_heap_check_identify_leaks) {
-      RAW_LOG(FATAL, "Whole-program memory leaks found.");
-    }
-    RAW_LOG(ERROR, "Exiting with error code (instead of crashing) "
-                   "because of whole-program memory leaks");
-    _exit(1);    // we don't want to call atexit() routines!
-  }
-  return true;
-}
-
-// static
-HeapLeakChecker* HeapLeakChecker::GlobalChecker() {
-  SpinLockHolder l(&heap_checker_lock);
-  return main_heap_checker;
-}
-
-// static
-bool HeapLeakChecker::NoGlobalLeaks() {
-  // symbolizing requires a fork, which isn't safe to do in general.
-  return NoGlobalLeaksMaybeSymbolize(DO_NOT_SYMBOLIZE);
-}
-
-// static
-void HeapLeakChecker::CancelGlobalCheck() {
-  SpinLockHolder l(&heap_checker_lock);
-  if (do_main_heap_check) {
-    RAW_VLOG(heap_checker_info_level,
-             "Canceling the automatic at-exit whole-program memory leak check");
-    do_main_heap_check = false;
-  }
-}
-
-// static
-void HeapLeakChecker::BeforeConstructorsLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_CHECK(!constructor_heap_profiling,
-            "BeforeConstructorsLocked called multiple times");
-#ifdef ADDRESS_SANITIZER
-  // AddressSanitizer's custom malloc conflicts with HeapChecker.
-  return;
-#endif
-  // Set hooks early to crash if 'new' gets called before we make heap_profile,
-  // and make sure no other hooks existed:
-  RAW_CHECK(MallocHook::AddNewHook(&NewHook), "");
-  RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), "");
-  constructor_heap_profiling = true;
-  MemoryRegionMap::Init(1, /* use_buckets */ false);
-    // Set up MemoryRegionMap with (at least) one caller stack frame to record
-    // (important that it's done before HeapProfileTable creation below).
-  Allocator::Init();
-  RAW_CHECK(heap_profile == NULL, "");
-  heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable)))
-      HeapProfileTable(&Allocator::Allocate, &Allocator::Free,
-                       /* profile_mmap */ false);
-  RAW_VLOG(10, "Starting tracking the heap");
-  heap_checker_on = true;
-}
-
-// static
-void HeapLeakChecker::TurnItselfOffLocked() {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  // Set FLAGS_heap_check to "", for users who test for it
-  if (!FLAGS_heap_check.empty())  // be a noop in the common case
-    FLAGS_heap_check.clear();     // because clear() could allocate memory
-  if (constructor_heap_profiling) {
-    RAW_CHECK(heap_checker_on, "");
-    RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off");
-    heap_checker_on = false;
-    // Unset our hooks checking they were set:
-    RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), "");
-    RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), "");
-    Allocator::DeleteAndNull(&heap_profile);
-    // free our optional global data:
-    Allocator::DeleteAndNullIfNot(&ignored_objects);
-    Allocator::DeleteAndNullIfNot(&disabled_ranges);
-    Allocator::DeleteAndNullIfNot(&global_region_caller_ranges);
-    Allocator::Shutdown();
-    MemoryRegionMap::Shutdown();
-  }
-  RAW_CHECK(!heap_checker_on, "");
-}
-
-extern bool heap_leak_checker_bcad_variable;  // in heap-checker-bcad.cc
-
-static bool has_called_before_constructors = false;
-
-// TODO(maxim): inline this function with
-// MallocHook_InitAtFirstAllocation_HeapLeakChecker, and also rename
-// HeapLeakChecker::BeforeConstructorsLocked.
-void HeapLeakChecker_BeforeConstructors() {
-  SpinLockHolder l(&heap_checker_lock);
-  // We can be called from several places: the first mmap/sbrk/alloc call
-  // or the first global c-tor from heap-checker-bcad.cc:
-  // Do not re-execute initialization:
-  if (has_called_before_constructors) return;
-  has_called_before_constructors = true;
-
-  heap_checker_pid = getpid();  // set it always
-  heap_leak_checker_bcad_variable = true;
-  // just to reference it, so that heap-checker-bcad.o is linked in
-
-  // This function can be called *very* early, before the normal
-  // global-constructor that sets FLAGS_verbose.  Set it manually now,
-  // so the RAW_LOG messages here are controllable.
-  const char* verbose_str = GetenvBeforeMain("PERFTOOLS_VERBOSE");
-  if (verbose_str && atoi(verbose_str)) {  // different than the default of 0?
-    FLAGS_verbose = atoi(verbose_str);
-  }
-
-  bool need_heap_check = true;
-  // The user indicates a desire for heap-checking via the HEAPCHECK
-  // environment variable.  If it's not set, there's no way to do
-  // heap-checking.
-  if (!GetenvBeforeMain("HEAPCHECK")) {
-    need_heap_check = false;
-  }
-#ifdef HAVE_GETEUID
-  if (need_heap_check && getuid() != geteuid()) {
-    // heap-checker writes out files.  Thus, for security reasons, we don't
-    // recognize the env. var. to turn on heap-checking if we're setuid.
-    RAW_LOG(WARNING, ("HeapChecker: ignoring HEAPCHECK because "
-                      "program seems to be setuid\n"));
-    need_heap_check = false;
-  }
-#endif
-  if (need_heap_check) {
-    HeapLeakChecker::BeforeConstructorsLocked();
-  }
-}
-
-// This function overrides the weak function defined in malloc_hook.cc and
-// called by one of the initial malloc hooks (malloc_hook.cc) when the very
-// first memory allocation or an mmap/sbrk happens.  This ensures that
-// HeapLeakChecker is initialized and installs all its hooks early enough to
-// track absolutely all memory allocations and all memory region acquisitions
-// via mmap and sbrk.
-extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() {
-  HeapLeakChecker_BeforeConstructors();
-}
-
-// This function is executed after all global object destructors run.
-void HeapLeakChecker_AfterDestructors() {
-  { SpinLockHolder l(&heap_checker_lock);
-    // can get here (via forks?) with other pids
-    if (heap_checker_pid != getpid()) return;
-  }
-  if (FLAGS_heap_check_after_destructors) {
-    if (HeapLeakChecker::DoMainHeapCheck()) {
-      const struct timespec sleep_time = { 0, 500000000 };  // 500 ms
-      nanosleep(&sleep_time, NULL);
-        // Need this hack to wait for other pthreads to exit.
-        // Otherwise tcmalloc find errors
-        // on a free() call from pthreads.
-    }
-  }
-  SpinLockHolder l(&heap_checker_lock);
-  RAW_CHECK(!do_main_heap_check, "should have done it");
-}
-
-//----------------------------------------------------------------------
-// HeapLeakChecker disabling helpers
-//----------------------------------------------------------------------
-
-// These functions are at the end of the file to prevent their inlining:
-
-// static
-void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address,
-                                                const void* end_address,
-                                                int max_depth) {
-  RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  RAW_DCHECK(start_address < end_address, "");
-  if (disabled_ranges == NULL) {
-    disabled_ranges = new(Allocator::Allocate(sizeof(DisabledRangeMap)))
-                        DisabledRangeMap;
-  }
-  RangeValue value;
-  value.start_address = AsInt(start_address);
-  value.max_depth = max_depth;
-  if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) {
-    RAW_VLOG(10, "Disabling leak checking in stack traces "
-                "under frame addresses between %p..%p",
-                start_address, end_address);
-  } else {  // check that this is just a verbatim repetition
-    RangeValue const& val = disabled_ranges->find(AsInt(end_address))->second;
-    if (val.max_depth != value.max_depth  ||
-        val.start_address != value.start_address) {
-      RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: "
-                     "(%p, %p, %d) vs. (%p, %p, %d)",
-                     AsPtr(val.start_address), end_address, val.max_depth,
-                     start_address, end_address, max_depth);
-    }
-  }
-}
-
-// static
-inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr,
-                                              size_t* object_size) {
-  // Commented-out because HaveOnHeapLocked is very performance-critical:
-  // RAW_DCHECK(heap_checker_lock.IsHeld(), "");
-  const uintptr_t addr = AsInt(*ptr);
-  if (heap_profile->FindInsideAlloc(
-        *ptr, max_heap_object_size, ptr, object_size)) {
-    RAW_VLOG(16, "Got pointer into %p at +%" PRIuPTR " offset",
-             *ptr, addr - AsInt(*ptr));
-    return true;
-  }
-  return false;
-}
-
-// static
-const void* HeapLeakChecker::GetAllocCaller(void* ptr) {
-  // this is used only in the unittest, so the heavy checks are fine
-  HeapProfileTable::AllocInfo info;
-  { SpinLockHolder l(&heap_checker_lock);
-    RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), "");
-  }
-  RAW_CHECK(info.stack_depth >= 1, "");
-  return info.call_stack[0];
-}
diff --git a/contrib/libtcmalloc/src/heap-profile-stats.h b/contrib/libtcmalloc/src/heap-profile-stats.h
deleted file mode 100644
index ae45d5883fa..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-stats.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2013, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file defines structs to accumulate memory allocation and deallocation
-// counts.  These structs are commonly used for malloc (in HeapProfileTable)
-// and mmap (in MemoryRegionMap).
-
-// A bucket is data structure for heap profiling to store a pair of a stack
-// trace and counts of (de)allocation.  Buckets are stored in a hash table
-// which is declared as "HeapProfileBucket**".
-//
-// A hash value is computed from a stack trace.  Collision in the hash table
-// is resolved by separate chaining with linked lists.  The links in the list
-// are implemented with the member "HeapProfileBucket* next".
-//
-// A structure of a hash table HeapProfileBucket** bucket_table would be like:
-// bucket_table[0] => NULL
-// bucket_table[1] => HeapProfileBucket() => HeapProfileBucket() => NULL
-// ...
-// bucket_table[i] => HeapProfileBucket() => NULL
-// ...
-// bucket_table[n] => HeapProfileBucket() => NULL
-
-#ifndef HEAP_PROFILE_STATS_H_
-#define HEAP_PROFILE_STATS_H_
-
-struct HeapProfileStats {
-  // Returns true if the two HeapProfileStats are semantically equal.
-  bool Equivalent(const HeapProfileStats& other) const {
-    return allocs - frees == other.allocs - other.frees &&
-        alloc_size - free_size == other.alloc_size - other.free_size;
-  }
-
-  int32 allocs;      // Number of allocation calls.
-  int32 frees;       // Number of free calls.
-  int64 alloc_size;  // Total size of all allocated objects so far.
-  int64 free_size;   // Total size of all freed objects so far.
-};
-
-// Allocation and deallocation statistics per each stack trace.
-struct HeapProfileBucket : public HeapProfileStats {
-  // Longest stack trace we record.
-  static const int kMaxStackDepth = 32;
-
-  uintptr_t hash;           // Hash value of the stack trace.
-  int depth;                // Depth of stack trace.
-  const void** stack;       // Stack trace.
-  HeapProfileBucket* next;  // Next entry in hash-table.
-};
-
-#endif  // HEAP_PROFILE_STATS_H_
diff --git a/contrib/libtcmalloc/src/heap-profile-table.cc b/contrib/libtcmalloc/src/heap-profile-table.cc
deleted file mode 100644
index 7486468c056..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-table.cc
+++ /dev/null
@@ -1,631 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//         Maxim Lifantsev (refactoring)
-//
-
-#include <config.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for write()
-#endif
-#include <fcntl.h>    // for open()
-#ifdef HAVE_GLOB_H
-#include <glob.h>
-#ifndef GLOB_NOMATCH  // true on some old cygwins
-# define GLOB_NOMATCH 0
-#endif
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h> // for PRIxPTR
-#endif
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
-#include <errno.h>
-#include <stdarg.h>
-#include <string>
-#include <map>
-#include <algorithm>  // for sort(), equal(), and copy()
-
-#include "heap-profile-table.h"
-
-#include "base/logging.h"
-#include "raw_printer.h"
-#include "symbolize.h"
-#include <gperftools/stacktrace.h>
-#include <gperftools/malloc_hook.h>
-#include "memory_region_map.h"
-#include "base/commandlineflags.h"
-#include "base/logging.h"    // for the RawFD I/O commands
-#include "base/sysinfo.h"
-
-using std::sort;
-using std::equal;
-using std::copy;
-using std::string;
-using std::map;
-
-using tcmalloc::FillProcSelfMaps;   // from sysinfo.h
-using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
-
-//----------------------------------------------------------------------
-
-DEFINE_bool(cleanup_old_heap_profiles,
-            EnvToBool("HEAP_PROFILE_CLEANUP", true),
-            "At initialization time, delete old heap profiles.");
-
-DEFINE_int32(heap_check_max_leaks,
-             EnvToInt("HEAP_CHECK_MAX_LEAKS", 20),
-             "The maximum number of leak reports to print.");
-
-//----------------------------------------------------------------------
-
-// header of the dumped heap profile
-static const char kProfileHeader[] = "heap profile: ";
-static const char kProcSelfMapsHeader[] = "\nMAPPED_LIBRARIES:\n";
-
-//----------------------------------------------------------------------
-
-const char HeapProfileTable::kFileExt[] = ".heap";
-
-//----------------------------------------------------------------------
-
-static const int kHashTableSize = 179999;   // Size for bucket_table_.
-/*static*/ const int HeapProfileTable::kMaxStackDepth;
-
-//----------------------------------------------------------------------
-
-// We strip out different number of stack frames in debug mode
-// because less inlining happens in that case
-#ifdef NDEBUG
-static const int kStripFrames = 2;
-#else
-static const int kStripFrames = 3;
-#endif
-
-// For sorting Stats or Buckets by in-use space
-static bool ByAllocatedSpace(HeapProfileTable::Stats* a,
-                             HeapProfileTable::Stats* b) {
-  // Return true iff "a" has more allocated space than "b"
-  return (a->alloc_size - a->free_size) > (b->alloc_size - b->free_size);
-}
-
-//----------------------------------------------------------------------
-
-HeapProfileTable::HeapProfileTable(Allocator alloc,
-                                   DeAllocator dealloc,
-                                   bool profile_mmap)
-    : alloc_(alloc),
-      dealloc_(dealloc),
-      profile_mmap_(profile_mmap),
-      bucket_table_(NULL),
-      num_buckets_(0),
-      address_map_(NULL) {
-  // Make a hash table for buckets.
-  const int table_bytes = kHashTableSize * sizeof(*bucket_table_);
-  bucket_table_ = static_cast<Bucket**>(alloc_(table_bytes));
-  memset(bucket_table_, 0, table_bytes);
-
-  // Make an allocation map.
-  address_map_ =
-      new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
-
-  // Initialize.
-  memset(&total_, 0, sizeof(total_));
-  num_buckets_ = 0;
-}
-
-HeapProfileTable::~HeapProfileTable() {
-  // Free the allocation map.
-  address_map_->~AllocationMap();
-  dealloc_(address_map_);
-  address_map_ = NULL;
-
-  // Free the hash table.
-  for (int i = 0; i < kHashTableSize; i++) {
-    for (Bucket* curr = bucket_table_[i]; curr != 0; /**/) {
-      Bucket* bucket = curr;
-      curr = curr->next;
-      dealloc_(bucket->stack);
-      dealloc_(bucket);
-    }
-  }
-  dealloc_(bucket_table_);
-  bucket_table_ = NULL;
-}
-
-HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
-                                                      const void* const key[]) {
-  // Make hash-value
-  uintptr_t h = 0;
-  for (int i = 0; i < depth; i++) {
-    h += reinterpret_cast<uintptr_t>(key[i]);
-    h += h << 10;
-    h ^= h >> 6;
-  }
-  h += h << 3;
-  h ^= h >> 11;
-
-  // Lookup stack trace in table
-  unsigned int buck = ((unsigned int) h) % kHashTableSize;
-  for (Bucket* b = bucket_table_[buck]; b != 0; b = b->next) {
-    if ((b->hash == h) &&
-        (b->depth == depth) &&
-        equal(key, key + depth, b->stack)) {
-      return b;
-    }
-  }
-
-  // Create new bucket
-  const size_t key_size = sizeof(key[0]) * depth;
-  const void** kcopy = reinterpret_cast<const void**>(alloc_(key_size));
-  copy(key, key + depth, kcopy);
-  Bucket* b = reinterpret_cast<Bucket*>(alloc_(sizeof(Bucket)));
-  memset(b, 0, sizeof(*b));
-  b->hash  = h;
-  b->depth = depth;
-  b->stack = kcopy;
-  b->next  = bucket_table_[buck];
-  bucket_table_[buck] = b;
-  num_buckets_++;
-  return b;
-}
-
-int HeapProfileTable::GetCallerStackTrace(
-    int skip_count, void* stack[kMaxStackDepth]) {
-  return MallocHook::GetCallerStackTrace(
-      stack, kMaxStackDepth, kStripFrames + skip_count + 1);
-}
-
-void HeapProfileTable::RecordAlloc(
-    const void* ptr, size_t bytes, int stack_depth,
-    const void* const call_stack[]) {
-  Bucket* b = GetBucket(stack_depth, call_stack);
-  b->allocs++;
-  b->alloc_size += bytes;
-  total_.allocs++;
-  total_.alloc_size += bytes;
-
-  AllocValue v;
-  v.set_bucket(b);  // also did set_live(false); set_ignore(false)
-  v.bytes = bytes;
-  address_map_->Insert(ptr, v);
-}
-
-void HeapProfileTable::RecordFree(const void* ptr) {
-  AllocValue v;
-  if (address_map_->FindAndRemove(ptr, &v)) {
-    Bucket* b = v.bucket();
-    b->frees++;
-    b->free_size += v.bytes;
-    total_.frees++;
-    total_.free_size += v.bytes;
-  }
-}
-
-bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const {
-  const AllocValue* alloc_value = address_map_->Find(ptr);
-  if (alloc_value != NULL) *object_size = alloc_value->bytes;
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::FindAllocDetails(const void* ptr,
-                                        AllocInfo* info) const {
-  const AllocValue* alloc_value = address_map_->Find(ptr);
-  if (alloc_value != NULL) {
-    info->object_size = alloc_value->bytes;
-    info->call_stack = alloc_value->bucket()->stack;
-    info->stack_depth = alloc_value->bucket()->depth;
-  }
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::FindInsideAlloc(const void* ptr,
-                                       size_t max_size,
-                                       const void** object_ptr,
-                                       size_t* object_size) const {
-  const AllocValue* alloc_value =
-    address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
-  if (alloc_value != NULL) *object_size = alloc_value->bytes;
-  return alloc_value != NULL;
-}
-
-bool HeapProfileTable::MarkAsLive(const void* ptr) {
-  AllocValue* alloc = address_map_->FindMutable(ptr);
-  if (alloc && !alloc->live()) {
-    alloc->set_live(true);
-    return true;
-  }
-  return false;
-}
-
-void HeapProfileTable::MarkAsIgnored(const void* ptr) {
-  AllocValue* alloc = address_map_->FindMutable(ptr);
-  if (alloc) {
-    alloc->set_ignore(true);
-  }
-}
-
-// We'd be happier using snprintfer, but we don't to reduce dependencies.
-int HeapProfileTable::UnparseBucket(const Bucket& b,
-                                    char* buf, int buflen, int bufsize,
-                                    const char* extra,
-                                    Stats* profile_stats) {
-  if (profile_stats != NULL) {
-    profile_stats->allocs += b.allocs;
-    profile_stats->alloc_size += b.alloc_size;
-    profile_stats->frees += b.frees;
-    profile_stats->free_size += b.free_size;
-  }
-  int printed =
-    snprintf(buf + buflen, bufsize - buflen, "%6d: %8" PRId64 " [%6d: %8" PRId64 "] @%s",
-             b.allocs - b.frees,
-             b.alloc_size - b.free_size,
-             b.allocs,
-             b.alloc_size,
-             extra);
-  // If it looks like the snprintf failed, ignore the fact we printed anything
-  if (printed < 0 || printed >= bufsize - buflen) return buflen;
-  buflen += printed;
-  for (int d = 0; d < b.depth; d++) {
-    printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08" PRIxPTR,
-                       reinterpret_cast<uintptr_t>(b.stack[d]));
-    if (printed < 0 || printed >= bufsize - buflen) return buflen;
-    buflen += printed;
-  }
-  printed = snprintf(buf + buflen, bufsize - buflen, "\n");
-  if (printed < 0 || printed >= bufsize - buflen) return buflen;
-  buflen += printed;
-  return buflen;
-}
-
-HeapProfileTable::Bucket**
-HeapProfileTable::MakeSortedBucketList() const {
-  Bucket** list = static_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
-
-  int bucket_count = 0;
-  for (int i = 0; i < kHashTableSize; i++) {
-    for (Bucket* curr = bucket_table_[i]; curr != 0; curr = curr->next) {
-      list[bucket_count++] = curr;
-    }
-  }
-  RAW_DCHECK(bucket_count == num_buckets_, "");
-
-  sort(list, list + num_buckets_, ByAllocatedSpace);
-
-  return list;
-}
-
-void HeapProfileTable::IterateOrderedAllocContexts(
-    AllocContextIterator callback) const {
-  Bucket** list = MakeSortedBucketList();
-  AllocContextInfo info;
-  for (int i = 0; i < num_buckets_; ++i) {
-    *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]);
-    info.stack_depth = list[i]->depth;
-    info.call_stack = list[i]->stack;
-    callback(info);
-  }
-  dealloc_(list);
-}
-
-int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
-  Bucket** list = MakeSortedBucketList();
-
-  // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info".
-  // In the cases buf is too small, we'd rather leave out the last
-  // buckets than leave out the /proc/self/maps info.  To ensure that,
-  // we actually print the /proc/self/maps info first, then move it to
-  // the end of the buffer, then write the bucket info into whatever
-  // is remaining, and then move the maps info one last time to close
-  // any gaps.  Whew!
-  int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader);
-  if (map_length < 0 || map_length >= size) {
-      dealloc_(list);
-      return 0;
-  }
-  bool dummy;   // "wrote_all" -- did /proc/self/maps fit in its entirety?
-  map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy);
-  RAW_DCHECK(map_length <= size, "");
-  char* const map_start = buf + size - map_length;      // move to end
-  memmove(map_start, buf, map_length);
-  size -= map_length;
-
-  Stats stats;
-  memset(&stats, 0, sizeof(stats));
-  int bucket_length = snprintf(buf, size, "%s", kProfileHeader);
-  if (bucket_length < 0 || bucket_length >= size) {
-      dealloc_(list);
-      return 0;
-  }
-  bucket_length = UnparseBucket(total_, buf, bucket_length, size,
-                                " heapprofile", &stats);
-
-  // Dump the mmap list first.
-  if (profile_mmap_) {
-    BufferArgs buffer(buf, bucket_length, size);
-    MemoryRegionMap::IterateBuckets<BufferArgs*>(DumpBucketIterator, &buffer);
-    bucket_length = buffer.buflen;
-  }
-
-  for (int i = 0; i < num_buckets_; i++) {
-    bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "",
-                                  &stats);
-  }
-  RAW_DCHECK(bucket_length < size, "");
-
-  dealloc_(list);
-
-  RAW_DCHECK(buf + bucket_length <= map_start, "");
-  memmove(buf + bucket_length, map_start, map_length);  // close the gap
-
-  return bucket_length + map_length;
-}
-
-// static
-void HeapProfileTable::DumpBucketIterator(const Bucket* bucket,
-                                          BufferArgs* args) {
-  args->buflen = UnparseBucket(*bucket, args->buf, args->buflen, args->bufsize,
-                               "", NULL);
-}
-
-inline
-void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v,
-                                           const DumpArgs& args) {
-  if (v->live()) {
-    v->set_live(false);
-    return;
-  }
-  if (v->ignore()) {
-    return;
-  }
-  Bucket b;
-  memset(&b, 0, sizeof(b));
-  b.allocs = 1;
-  b.alloc_size = v->bytes;
-  b.depth = v->bucket()->depth;
-  b.stack = v->bucket()->stack;
-  char buf[1024];
-  int len = UnparseBucket(b, buf, 0, sizeof(buf), "", args.profile_stats);
-  RawWrite(args.fd, buf, len);
-}
-
-// Callback from NonLiveSnapshot; adds entry to arg->dest
-// if not the entry is not live and is not present in arg->base.
-void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v,
-                                    AddNonLiveArgs* arg) {
-  if (v->live()) {
-    v->set_live(false);
-  } else {
-    if (arg->base != NULL && arg->base->map_.Find(ptr) != NULL) {
-      // Present in arg->base, so do not save
-    } else {
-      arg->dest->Add(ptr, *v);
-    }
-  }
-}
-
-bool HeapProfileTable::WriteProfile(const char* file_name,
-                                    const Bucket& total,
-                                    AllocationMap* allocations) {
-  RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name);
-  RawFD fd = RawOpenForWriting(file_name);
-  if (fd != kIllegalRawFD) {
-    RawWrite(fd, kProfileHeader, strlen(kProfileHeader));
-    char buf[512];
-    int len = UnparseBucket(total, buf, 0, sizeof(buf), " heapprofile",
-                            NULL);
-    RawWrite(fd, buf, len);
-    const DumpArgs args(fd, NULL);
-    allocations->Iterate<const DumpArgs&>(DumpNonLiveIterator, args);
-    RawWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader));
-    DumpProcSelfMaps(fd);
-    RawClose(fd);
-    return true;
-  } else {
-    RAW_LOG(ERROR, "Failed dumping filtered heap profile to %s", file_name);
-    return false;
-  }
-}
-
-void HeapProfileTable::CleanupOldProfiles(const char* prefix) {
-  if (!FLAGS_cleanup_old_heap_profiles)
-    return;
-  string pattern = string(prefix) + ".*" + kFileExt;
-#if defined(HAVE_GLOB_H)
-  glob_t g;
-  const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g);
-  if (r == 0 || r == GLOB_NOMATCH) {
-    const int prefix_length = strlen(prefix);
-    for (int i = 0; i < g.gl_pathc; i++) {
-      const char* fname = g.gl_pathv[i];
-      if ((strlen(fname) >= prefix_length) &&
-          (memcmp(fname, prefix, prefix_length) == 0)) {
-        RAW_VLOG(1, "Removing old heap profile %s", fname);
-        unlink(fname);
-      }
-    }
-  }
-  globfree(&g);
-#else   /* HAVE_GLOB_H */
-  RAW_LOG(WARNING, "Unable to remove old heap profiles (can't run glob())");
-#endif
-}
-
-HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() {
-  Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
-  address_map_->Iterate(AddToSnapshot, s);
-  return s;
-}
-
-void HeapProfileTable::ReleaseSnapshot(Snapshot* s) {
-  s->~Snapshot();
-  dealloc_(s);
-}
-
-// Callback from TakeSnapshot; adds a single entry to snapshot
-void HeapProfileTable::AddToSnapshot(const void* ptr, AllocValue* v,
-                                     Snapshot* snapshot) {
-  snapshot->Add(ptr, *v);
-}
-
-HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot(
-    Snapshot* base) {
-  RAW_VLOG(2, "NonLiveSnapshot input: %d %d\n",
-           int(total_.allocs - total_.frees),
-           int(total_.alloc_size - total_.free_size));
-
-  Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
-  AddNonLiveArgs args;
-  args.dest = s;
-  args.base = base;
-  address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
-  RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n",
-           int(s->total_.allocs - s->total_.frees),
-           int(s->total_.alloc_size - s->total_.free_size));
-  return s;
-}
-
-// Information kept per unique bucket seen
-struct HeapProfileTable::Snapshot::Entry {
-  int count;
-  int bytes;
-  Bucket* bucket;
-  Entry() : count(0), bytes(0) { }
-
-  // Order by decreasing bytes
-  bool operator<(const Entry& x) const {
-    return this->bytes > x.bytes;
-  }
-};
-
-// State used to generate leak report.  We keep a mapping from Bucket pointer
-// the collected stats for that bucket.
-struct HeapProfileTable::Snapshot::ReportState {
-  map<Bucket*, Entry> buckets_;
-};
-
-// Callback from ReportLeaks; updates ReportState.
-void HeapProfileTable::Snapshot::ReportCallback(const void* ptr,
-                                                AllocValue* v,
-                                                ReportState* state) {
-  Entry* e = &state->buckets_[v->bucket()]; // Creates empty Entry first time
-  e->bucket = v->bucket();
-  e->count++;
-  e->bytes += v->bytes;
-}
-
-void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
-                                             const char* filename,
-                                             bool should_symbolize) {
-  // This is only used by the heap leak checker, but is intimately
-  // tied to the allocation map that belongs in this module and is
-  // therefore placed here.
-  RAW_LOG(ERROR, "Leak check %s detected leaks of %" PRIuS " bytes "
-          "in %" PRIuS " objects",
-          checker_name,
-          size_t(total_.alloc_size),
-          size_t(total_.allocs));
-
-  // Group objects by Bucket
-  ReportState state;
-  map_.Iterate(&ReportCallback, &state);
-
-  // Sort buckets by decreasing leaked size
-  const int n = state.buckets_.size();
-  Entry* entries = new Entry[n];
-  int dst = 0;
-  for (map<Bucket*,Entry>::const_iterator iter = state.buckets_.begin();
-       iter != state.buckets_.end();
-       ++iter) {
-    entries[dst++] = iter->second;
-  }
-  sort(entries, entries + n);
-
-  // Report a bounded number of leaks to keep the leak report from
-  // growing too long.
-  const int to_report =
-      (FLAGS_heap_check_max_leaks > 0 &&
-       n > FLAGS_heap_check_max_leaks) ? FLAGS_heap_check_max_leaks : n;
-  RAW_LOG(ERROR, "The %d largest leaks:", to_report);
-
-  // Print
-  SymbolTable symbolization_table;
-  for (int i = 0; i < to_report; i++) {
-    const Entry& e = entries[i];
-    for (int j = 0; j < e.bucket->depth; j++) {
-      symbolization_table.Add(e.bucket->stack[j]);
-    }
-  }
-  static const int kBufSize = 2<<10;
-  char buffer[kBufSize];
-  if (should_symbolize)
-    symbolization_table.Symbolize();
-  for (int i = 0; i < to_report; i++) {
-    const Entry& e = entries[i];
-    base::RawPrinter printer(buffer, kBufSize);
-    printer.Printf("Leak of %d bytes in %d objects allocated from:\n",
-                   e.bytes, e.count);
-    for (int j = 0; j < e.bucket->depth; j++) {
-      const void* pc = e.bucket->stack[j];
-      printer.Printf("\t@ %" PRIxPTR " %s\n",
-          reinterpret_cast<uintptr_t>(pc), symbolization_table.GetSymbol(pc));
-    }
-    RAW_LOG(ERROR, "%s", buffer);
-  }
-
-  if (to_report < n) {
-    RAW_LOG(ERROR, "Skipping leaks numbered %d..%d",
-            to_report, n-1);
-  }
-  delete[] entries;
-
-  // TODO: Dump the sorted Entry list instead of dumping raw data?
-  // (should be much shorter)
-  if (!HeapProfileTable::WriteProfile(filename, total_, &map_)) {
-    RAW_LOG(ERROR, "Could not write pprof profile to %s", filename);
-  }
-}
-
-void HeapProfileTable::Snapshot::ReportObject(const void* ptr,
-                                              AllocValue* v,
-                                              char* unused) {
-  // Perhaps also log the allocation stack trace (unsymbolized)
-  // on this line in case somebody finds it useful.
-  RAW_LOG(ERROR, "leaked %" PRIuS " byte object %p", v->bytes, ptr);
-}
-
-void HeapProfileTable::Snapshot::ReportIndividualObjects() {
-  char unused;
-  map_.Iterate(ReportObject, &unused);
-}
diff --git a/contrib/libtcmalloc/src/heap-profile-table.h b/contrib/libtcmalloc/src/heap-profile-table.h
deleted file mode 100644
index 3c6284741af..00000000000
--- a/contrib/libtcmalloc/src/heap-profile-table.h
+++ /dev/null
@@ -1,399 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2006, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//         Maxim Lifantsev (refactoring)
-//
-
-#ifndef BASE_HEAP_PROFILE_TABLE_H_
-#define BASE_HEAP_PROFILE_TABLE_H_
-
-#include "addressmap-inl.h"
-#include "base/basictypes.h"
-#include "base/logging.h"   // for RawFD
-#include "heap-profile-stats.h"
-
-// Table to maintain a heap profile data inside,
-// i.e. the set of currently active heap memory allocations.
-// thread-unsafe and non-reentrant code:
-// each instance object must be used by one thread
-// at a time w/o self-recursion.
-//
-// TODO(maxim): add a unittest for this class.
-class HeapProfileTable {
- public:
-
-  // Extension to be used for heap pforile files.
-  static const char kFileExt[];
-
-  // Longest stack trace we record.
-  static const int kMaxStackDepth = 32;
-
-  // data types ----------------------------
-
-  // Profile stats.
-  typedef HeapProfileStats Stats;
-
-  // Info we can return about an allocation.
-  struct AllocInfo {
-    size_t object_size;  // size of the allocation
-    const void* const* call_stack;  // call stack that made the allocation call
-    int stack_depth;  // depth of call_stack
-    bool live;
-    bool ignored;
-  };
-
-  // Info we return about an allocation context.
-  // An allocation context is a unique caller stack trace
-  // of an allocation operation.
-  struct AllocContextInfo : public Stats {
-    int stack_depth;                // Depth of stack trace
-    const void* const* call_stack;  // Stack trace
-  };
-
-  // Memory (de)allocator interface we'll use.
-  typedef void* (*Allocator)(size_t size);
-  typedef void  (*DeAllocator)(void* ptr);
-
-  // interface ---------------------------
-
-  HeapProfileTable(Allocator alloc, DeAllocator dealloc, bool profile_mmap);
-  ~HeapProfileTable();
-
-  // Collect the stack trace for the function that asked to do the
-  // allocation for passing to RecordAlloc() below.
-  //
-  // The stack trace is stored in 'stack'. The stack depth is returned.
-  //
-  // 'skip_count' gives the number of stack frames between this call
-  // and the memory allocation function.
-  static int GetCallerStackTrace(int skip_count, void* stack[kMaxStackDepth]);
-
-  // Record an allocation at 'ptr' of 'bytes' bytes.  'stack_depth'
-  // and 'call_stack' identifying the function that requested the
-  // allocation. They can be generated using GetCallerStackTrace() above.
-  void RecordAlloc(const void* ptr, size_t bytes,
-                   int stack_depth, const void* const call_stack[]);
-
-  // Record the deallocation of memory at 'ptr'.
-  void RecordFree(const void* ptr);
-
-  // Return true iff we have recorded an allocation at 'ptr'.
-  // If yes, fill *object_size with the allocation byte size.
-  bool FindAlloc(const void* ptr, size_t* object_size) const;
-  // Same as FindAlloc, but fills all of *info.
-  bool FindAllocDetails(const void* ptr, AllocInfo* info) const;
-
-  // Return true iff "ptr" points into a recorded allocation
-  // If yes, fill *object_ptr with the actual allocation address
-  // and *object_size with the allocation byte size.
-  // max_size specifies largest currently possible allocation size.
-  bool FindInsideAlloc(const void* ptr, size_t max_size,
-                       const void** object_ptr, size_t* object_size) const;
-
-  // If "ptr" points to a recorded allocation and it's not marked as live
-  // mark it as live and return true. Else return false.
-  // All allocations start as non-live.
-  bool MarkAsLive(const void* ptr);
-
-  // If "ptr" points to a recorded allocation, mark it as "ignored".
-  // Ignored objects are treated like other objects, except that they
-  // are skipped in heap checking reports.
-  void MarkAsIgnored(const void* ptr);
-
-  // Return current total (de)allocation statistics.  It doesn't contain
-  // mmap'ed regions.
-  const Stats& total() const { return total_; }
-
-  // Allocation data iteration callback: gets passed object pointer and
-  // fully-filled AllocInfo.
-  typedef void (*AllocIterator)(const void* ptr, const AllocInfo& info);
-
-  // Iterate over the allocation profile data calling "callback"
-  // for every allocation.
-  void IterateAllocs(AllocIterator callback) const {
-    address_map_->Iterate(MapArgsAllocIterator, callback);
-  }
-
-  // Allocation context profile data iteration callback
-  typedef void (*AllocContextIterator)(const AllocContextInfo& info);
-
-  // Iterate over the allocation context profile data calling "callback"
-  // for every allocation context. Allocation contexts are ordered by the
-  // size of allocated space.
-  void IterateOrderedAllocContexts(AllocContextIterator callback) const;
-
-  // Fill profile data into buffer 'buf' of size 'size'
-  // and return the actual size occupied by the dump in 'buf'.
-  // The profile buckets are dumped in the decreasing order
-  // of currently allocated bytes.
-  // We do not provision for 0-terminating 'buf'.
-  int FillOrderedProfile(char buf[], int size) const;
-
-  // Cleanup any old profile files matching prefix + ".*" + kFileExt.
-  static void CleanupOldProfiles(const char* prefix);
-
-  // Return a snapshot of the current contents of *this.
-  // Caller must call ReleaseSnapshot() on result when no longer needed.
-  // The result is only valid while this exists and until
-  // the snapshot is discarded by calling ReleaseSnapshot().
-  class Snapshot;
-  Snapshot* TakeSnapshot();
-
-  // Release a previously taken snapshot.  snapshot must not
-  // be used after this call.
-  void ReleaseSnapshot(Snapshot* snapshot);
-
-  // Return a snapshot of every non-live, non-ignored object in *this.
-  // If "base" is non-NULL, skip any objects present in "base".
-  // As a side-effect, clears the "live" bit on every live object in *this.
-  // Caller must call ReleaseSnapshot() on result when no longer needed.
-  Snapshot* NonLiveSnapshot(Snapshot* base);
-
- private:
-
-  // data types ----------------------------
-
-  // Hash table bucket to hold (de)allocation stats
-  // for a given allocation call stack trace.
-  typedef HeapProfileBucket Bucket;
-
-  // Info stored in the address map
-  struct AllocValue {
-    // Access to the stack-trace bucket
-    Bucket* bucket() const {
-      return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(kMask));
-    }
-    // This also does set_live(false).
-    void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); }
-    size_t  bytes;   // Number of bytes in this allocation
-
-    // Access to the allocation liveness flag (for leak checking)
-    bool live() const { return bucket_rep & kLive; }
-    void set_live(bool l) {
-      bucket_rep = (bucket_rep & ~uintptr_t(kLive)) | (l ? kLive : 0);
-    }
-
-    // Should this allocation be ignored if it looks like a leak?
-    bool ignore() const { return bucket_rep & kIgnore; }
-    void set_ignore(bool r) {
-      bucket_rep = (bucket_rep & ~uintptr_t(kIgnore)) | (r ? kIgnore : 0);
-    }
-
-   private:
-    // We store a few bits in the bottom bits of bucket_rep.
-    // (Alignment is at least four, so we have at least two bits.)
-    static const int kLive = 1;
-    static const int kIgnore = 2;
-    static const int kMask = kLive | kIgnore;
-
-    uintptr_t bucket_rep;
-  };
-
-  // helper for FindInsideAlloc
-  static size_t AllocValueSize(const AllocValue& v) { return v.bytes; }
-
-  typedef AddressMap<AllocValue> AllocationMap;
-
-  // Arguments that need to be passed DumpBucketIterator callback below.
-  struct BufferArgs {
-    BufferArgs(char* buf_arg, int buflen_arg, int bufsize_arg)
-        : buf(buf_arg),
-          buflen(buflen_arg),
-          bufsize(bufsize_arg) {
-    }
-
-    char* buf;
-    int buflen;
-    int bufsize;
-
-    DISALLOW_COPY_AND_ASSIGN(BufferArgs);
-  };
-
-  // Arguments that need to be passed DumpNonLiveIterator callback below.
-  struct DumpArgs {
-    DumpArgs(RawFD fd_arg, Stats* profile_stats_arg)
-        : fd(fd_arg),
-          profile_stats(profile_stats_arg) {
-    }
-
-    RawFD fd;  // file to write to
-    Stats* profile_stats;  // stats to update (may be NULL)
-  };
-
-  // helpers ----------------------------
-
-  // Unparse bucket b and print its portion of profile dump into buf.
-  // We return the amount of space in buf that we use.  We start printing
-  // at buf + buflen, and promise not to go beyond buf + bufsize.
-  // We do not provision for 0-terminating 'buf'.
-  //
-  // If profile_stats is non-NULL, we update *profile_stats by
-  // counting bucket b.
-  //
-  // "extra" is appended to the unparsed bucket.  Typically it is empty,
-  // but may be set to something like " heapprofile" for the total
-  // bucket to indicate the type of the profile.
-  static int UnparseBucket(const Bucket& b,
-                           char* buf, int buflen, int bufsize,
-                           const char* extra,
-                           Stats* profile_stats);
-
-  // Get the bucket for the caller stack trace 'key' of depth 'depth'
-  // creating the bucket if needed.
-  Bucket* GetBucket(int depth, const void* const key[]);
-
-  // Helper for IterateAllocs to do callback signature conversion
-  // from AllocationMap::Iterate to AllocIterator.
-  static void MapArgsAllocIterator(const void* ptr, AllocValue* v,
-                                   AllocIterator callback) {
-    AllocInfo info;
-    info.object_size = v->bytes;
-    info.call_stack = v->bucket()->stack;
-    info.stack_depth = v->bucket()->depth;
-    info.live = v->live();
-    info.ignored = v->ignore();
-    callback(ptr, info);
-  }
-
-  // Helper to dump a bucket.
-  inline static void DumpBucketIterator(const Bucket* bucket,
-                                        BufferArgs* args);
-
-  // Helper for DumpNonLiveProfile to do object-granularity
-  // heap profile dumping. It gets passed to AllocationMap::Iterate.
-  inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v,
-                                         const DumpArgs& args);
-
-  // Helper for IterateOrderedAllocContexts and FillOrderedProfile.
-  // Creates a sorted list of Buckets whose length is num_buckets_.
-  // The caller is responsible for deallocating the returned list.
-  Bucket** MakeSortedBucketList() const;
-
-  // Helper for TakeSnapshot.  Saves object to snapshot.
-  static void AddToSnapshot(const void* ptr, AllocValue* v, Snapshot* s);
-
-  // Arguments passed to AddIfNonLive
-  struct AddNonLiveArgs {
-    Snapshot* dest;
-    Snapshot* base;
-  };
-
-  // Helper for NonLiveSnapshot.  Adds the object to the destination
-  // snapshot if it is non-live.
-  static void AddIfNonLive(const void* ptr, AllocValue* v,
-                           AddNonLiveArgs* arg);
-
-  // Write contents of "*allocations" as a heap profile to
-  // "file_name".  "total" must contain the total of all entries in
-  // "*allocations".
-  static bool WriteProfile(const char* file_name,
-                           const Bucket& total,
-                           AllocationMap* allocations);
-
-  // data ----------------------------
-
-  // Memory (de)allocator that we use.
-  Allocator alloc_;
-  DeAllocator dealloc_;
-
-  // Overall profile stats; we use only the Stats part,
-  // but make it a Bucket to pass to UnparseBucket.
-  Bucket total_;
-
-  bool profile_mmap_;
-
-  // Bucket hash table for malloc.
-  // We hand-craft one instead of using one of the pre-written
-  // ones because we do not want to use malloc when operating on the table.
-  // It is only few lines of code, so no big deal.
-  Bucket** bucket_table_;
-  int num_buckets_;
-
-  // Map of all currently allocated objects and mapped regions we know about.
-  AllocationMap* address_map_;
-
-  DISALLOW_COPY_AND_ASSIGN(HeapProfileTable);
-};
-
-class HeapProfileTable::Snapshot {
- public:
-  const Stats& total() const { return total_; }
-
-  // Report anything in this snapshot as a leak.
-  // May use new/delete for temporary storage.
-  // If should_symbolize is true, will fork (which is not threadsafe)
-  // to turn addresses into symbol names.  Set to false for maximum safety.
-  // Also writes a heap profile to "filename" that contains
-  // all of the objects in this snapshot.
-  void ReportLeaks(const char* checker_name, const char* filename,
-                   bool should_symbolize);
-
-  // Report the addresses of all leaked objects.
-  // May use new/delete for temporary storage.
-  void ReportIndividualObjects();
-
-  bool Empty() const {
-    return (total_.allocs == 0) && (total_.alloc_size == 0);
-  }
-
- private:
-  friend class HeapProfileTable;
-
-  // Total count/size are stored in a Bucket so we can reuse UnparseBucket
-  Bucket total_;
-
-  // We share the Buckets managed by the parent table, but have our
-  // own object->bucket map.
-  AllocationMap map_;
-
-  Snapshot(Allocator alloc, DeAllocator dealloc) : map_(alloc, dealloc) {
-    memset(&total_, 0, sizeof(total_));
-  }
-
-  // Callback used to populate a Snapshot object with entries found
-  // in another allocation map.
-  inline void Add(const void* ptr, const AllocValue& v) {
-    map_.Insert(ptr, v);
-    total_.allocs++;
-    total_.alloc_size += v.bytes;
-  }
-
-  // Helpers for sorting and generating leak reports
-  struct Entry;
-  struct ReportState;
-  static void ReportCallback(const void* ptr, AllocValue* v, ReportState*);
-  static void ReportObject(const void* ptr, AllocValue* v, char*);
-
-  DISALLOW_COPY_AND_ASSIGN(Snapshot);
-};
-
-#endif  // BASE_HEAP_PROFILE_TABLE_H_
diff --git a/contrib/libtcmalloc/src/internal_logging.cc b/contrib/libtcmalloc/src/internal_logging.cc
deleted file mode 100644
index 3b2d0cb80f9..00000000000
--- a/contrib/libtcmalloc/src/internal_logging.cc
+++ /dev/null
@@ -1,192 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include "internal_logging.h"
-#include <stdarg.h>                     // for va_end, va_start
-#include <stdio.h>                      // for vsnprintf, va_list, etc
-#include <stdlib.h>                     // for abort
-#include <string.h>                     // for strlen, memcpy
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>    // for write()
-#endif
-
-#include <gperftools/malloc_extension.h>
-#include "base/logging.h"   // for perftools_vsnprintf
-#include "base/spinlock.h"              // for SpinLockHolder, SpinLock
-
-// Variables for storing crash output.  Allocated statically since we
-// may not be able to heap-allocate while crashing.
-static SpinLock crash_lock(base::LINKER_INITIALIZED);
-static bool crashed = false;
-static const int kStatsBufferSize = 16 << 10;
-static char stats_buffer[kStatsBufferSize] = { 0 };
-
-namespace tcmalloc {
-
-static void WriteMessage(const char* msg, int length) {
-  write(STDERR_FILENO, msg, length);
-}
-
-void (*log_message_writer)(const char* msg, int length) = WriteMessage;
-
-
-class Logger {
- public:
-  bool Add(const LogItem& item);
-  bool AddStr(const char* str, int n);
-  bool AddNum(uint64_t num, int base);  // base must be 10 or 16.
-
-  static const int kBufSize = 200;
-  char* p_;
-  char* end_;
-  char buf_[kBufSize];
-};
-
-void Log(LogMode mode, const char* filename, int line,
-         LogItem a, LogItem b, LogItem c, LogItem d) {
-  Logger state;
-  state.p_ = state.buf_;
-  state.end_ = state.buf_ + sizeof(state.buf_);
-  state.AddStr(filename, strlen(filename))
-      && state.AddStr(":", 1)
-      && state.AddNum(line, 10)
-      && state.AddStr("]", 1)
-      && state.Add(a)
-      && state.Add(b)
-      && state.Add(c)
-      && state.Add(d);
-
-  // Teminate with newline
-  if (state.p_ >= state.end_) {
-    state.p_ = state.end_ - 1;
-  }
-  *state.p_ = '\n';
-  state.p_++;
-
-  int msglen = state.p_ - state.buf_;
-  if (mode == kLog) {
-    (*log_message_writer)(state.buf_, msglen);
-    return;
-  }
-
-  bool first_crash = false;
-  {
-    SpinLockHolder l(&crash_lock);
-    if (!crashed) {
-      crashed = true;
-      first_crash = true;
-    }
-  }
-
-  (*log_message_writer)(state.buf_, msglen);
-  if (first_crash && mode == kCrashWithStats) {
-    MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize);
-    (*log_message_writer)(stats_buffer, strlen(stats_buffer));
-  }
-
-  abort();
-}
-
-bool Logger::Add(const LogItem& item) {
-  // Separate items with spaces
-  if (p_ < end_) {
-    *p_ = ' ';
-    p_++;
-  }
-
-  switch (item.tag_) {
-    case LogItem::kStr:
-      return AddStr(item.u_.str, strlen(item.u_.str));
-    case LogItem::kUnsigned:
-      return AddNum(item.u_.unum, 10);
-    case LogItem::kSigned:
-      if (item.u_.snum < 0) {
-        // The cast to uint64_t is intentionally before the negation
-        // so that we do not attempt to negate -2^63.
-        return AddStr("-", 1)
-            && AddNum(- static_cast<uint64_t>(item.u_.snum), 10);
-      } else {
-        return AddNum(static_cast<uint64_t>(item.u_.snum), 10);
-      }
-    case LogItem::kPtr:
-      return AddStr("0x", 2)
-          && AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16);
-    default:
-      return false;
-  }
-}
-
-bool Logger::AddStr(const char* str, int n) {
-  if (end_ - p_ < n) {
-    return false;
-  } else {
-    memcpy(p_, str, n);
-    p_ += n;
-    return true;
-  }
-}
-
-bool Logger::AddNum(uint64_t num, int base) {
-  static const char kDigits[] = "0123456789abcdef";
-  char space[22];  // more than enough for 2^64 in smallest supported base (10)
-  char* end = space + sizeof(space);
-  char* pos = end;
-  do {
-    pos--;
-    *pos = kDigits[num % base];
-    num /= base;
-  } while (num > 0 && pos > space);
-  return AddStr(pos, end - pos);
-}
-
-}  // end tcmalloc namespace
-
-void TCMalloc_Printer::printf(const char* format, ...) {
-  if (left_ > 0) {
-    va_list ap;
-    va_start(ap, format);
-    const int r = perftools_vsnprintf(buf_, left_, format, ap);
-    va_end(ap);
-    if (r < 0) {
-      // Perhaps an old glibc that returns -1 on truncation?
-      left_ = 0;
-    } else if (r > left_) {
-      // Truncation
-      left_ = 0;
-    } else {
-      left_ -= r;
-      buf_ += r;
-    }
-  }
-}
diff --git a/contrib/libtcmalloc/src/internal_logging.h b/contrib/libtcmalloc/src/internal_logging.h
deleted file mode 100644
index c6363894911..00000000000
--- a/contrib/libtcmalloc/src/internal_logging.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Internal logging and related utility routines.
-
-#ifndef TCMALLOC_INTERNAL_LOGGING_H_
-#define TCMALLOC_INTERNAL_LOGGING_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-
-//-------------------------------------------------------------------
-// Utility routines
-//-------------------------------------------------------------------
-
-// Safe logging helper: we write directly to the stderr file
-// descriptor and avoid FILE buffering because that may invoke
-// malloc().
-//
-// Example:
-//   Log(kLog, __FILE__, __LINE__, "error", bytes);
-
-namespace tcmalloc {
-enum LogMode {
-  kLog,                       // Just print the message
-  kCrash,                     // Print the message and crash
-  kCrashWithStats             // Print the message, some stats, and crash
-};
-
-class Logger;
-
-// A LogItem holds any of the argument types that can be passed to Log()
-class LogItem {
- public:
-  LogItem()                     : tag_(kEnd)      { }
-  LogItem(const char* v)        : tag_(kStr)      { u_.str = v; }
-  LogItem(int v)                : tag_(kSigned)   { u_.snum = v; }
-  LogItem(long v)               : tag_(kSigned)   { u_.snum = v; }
-  LogItem(long long v)          : tag_(kSigned)   { u_.snum = v; }
-  LogItem(unsigned int v)       : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(unsigned long v)      : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; }
-  LogItem(const void* v)        : tag_(kPtr)      { u_.ptr = v; }
- private:
-  friend class Logger;
-  enum Tag {
-    kStr,
-    kSigned,
-    kUnsigned,
-    kPtr,
-    kEnd
-  };
-  Tag tag_;
-  union {
-    const char* str;
-    const void* ptr;
-    int64_t snum;
-    uint64_t unum;
-  } u_;
-};
-
-extern PERFTOOLS_DLL_DECL void Log(LogMode mode, const char* filename, int line,
-                LogItem a, LogItem b = LogItem(),
-                LogItem c = LogItem(), LogItem d = LogItem());
-
-// Tests can override this function to collect logging messages.
-extern PERFTOOLS_DLL_DECL void (*log_message_writer)(const char* msg, int length);
-
-}  // end tcmalloc namespace
-
-// Like assert(), but executed even in NDEBUG mode
-#undef CHECK_CONDITION
-#define CHECK_CONDITION(cond)                                            \
-do {                                                                     \
-  if (!(cond)) {                                                         \
-    ::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond);      \
-  }                                                                      \
-} while (0)
-
-// Our own version of assert() so we can avoid hanging by trying to do
-// all kinds of goofy printing while holding the malloc lock.
-#ifndef NDEBUG
-#define ASSERT(cond) CHECK_CONDITION(cond)
-#else
-#define ASSERT(cond) ((void) 0)
-#endif
-
-// Print into buffer
-class TCMalloc_Printer {
- private:
-  char* buf_;           // Where should we write next
-  int   left_;          // Space left in buffer (including space for \0)
-
- public:
-  // REQUIRES: "length > 0"
-  TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) {
-    buf[0] = '\0';
-  }
-
-  void printf(const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-    __attribute__ ((__format__ (__printf__, 2, 3)))
-#endif
-;
-};
-
-#endif  // TCMALLOC_INTERNAL_LOGGING_H_
diff --git a/contrib/libtcmalloc/src/libc_override.h b/contrib/libtcmalloc/src/libc_override.h
deleted file mode 100644
index 0dbabb2d169..00000000000
--- a/contrib/libtcmalloc/src/libc_override.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// This .h file imports the code that causes tcmalloc to override libc
-// versions of malloc/free/new/delete/etc.  That is, it provides the
-// logic that makes it so calls to malloc(10) go through tcmalloc,
-// rather than the default (libc) malloc.
-//
-// This file also provides a method: ReplaceSystemAlloc(), that every
-// libc_override_*.h file it #includes is required to provide.  This
-// is called when first setting up tcmalloc -- that is, when a global
-// constructor in tcmalloc.cc is executed -- to do any initialization
-// work that may be required for this OS.  (Note we cannot entirely
-// control when tcmalloc is initialized, and the system may do some
-// mallocs and frees before this routine is called.)  It may be a
-// noop.
-//
-// Every libc has its own way of doing this, and sometimes the compiler
-// matters too, so we have a different file for each libc, and often
-// for different compilers and OS's.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_INL_H_
-
-#include "config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>   // for __GLIBC__
-#endif
-#include <gperftools/tcmalloc.h>
-
-static void ReplaceSystemAlloc();  // defined in the .h files below
-
-// For windows, there are two ways to get tcmalloc.  If we're
-// patching, then src/windows/patch_function.cc will do the necessary
-// overriding here.  Otherwise, we doing the 'redefine' trick, where
-// we remove malloc/new/etc from mscvcrt.dll, and just need to define
-// them now.
-#if defined(_WIN32) && defined(WIN32_DO_PATCHING)
-void PatchWindowsFunctions();   // in src/windows/patch_function.cc
-static void ReplaceSystemAlloc() { PatchWindowsFunctions(); }
-
-#elif defined(_WIN32) && !defined(WIN32_DO_PATCHING)
-#include "libc_override_redefine.h"
-
-#elif defined(__APPLE__)
-#include "libc_override_osx.h"
-
-#elif defined(__GLIBC__)
-#include "libc_override_glibc.h"
-
-// Not all gcc systems necessarily support weak symbols, but all the
-// ones I know of do, so for now just assume they all do.
-#elif defined(__GNUC__)
-#include "libc_override_gcc_and_weak.h"
-
-#else
-#error Need to add support for your libc/OS here
-
-#endif
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h b/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h
deleted file mode 100644
index 323d615d0ac..00000000000
--- a/contrib/libtcmalloc/src/libc_override_gcc_and_weak.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on systems that define the
-// memory allocation routines to be weak symbols in their libc
-// (almost all unix-based systems are like this), on gcc, which
-// suppports the 'alias' attribute.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
-
-#ifdef HAVE_SYS_CDEFS_H
-#include <sys/cdefs.h>    // for __THROW
-#endif
-#include <gperftools/tcmalloc.h>
-
-#include "getenv_safe.h" // TCMallocGetenvSafe
-#include "base/commandlineflags.h"
-
-#ifndef __THROW    // I guess we're not on a glibc-like system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-#ifndef __GNUC__
-# error libc_override_gcc_and_weak.h is for gcc distributions only.
-#endif
-
-#define ALIAS(tc_fn)   __attribute__ ((alias (#tc_fn), used))
-
-void* operator new(size_t size)
-    ALIAS(tc_new);
-void operator delete(void* p) noexcept
-    ALIAS(tc_delete);
-void* operator new[](size_t size)
-    ALIAS(tc_newarray);
-void operator delete[](void* p) noexcept
-    ALIAS(tc_deletearray);
-void* operator new(size_t size, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_new_nothrow);
-void* operator new[](size_t size, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_newarray_nothrow);
-void operator delete(void* p, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_delete_nothrow);
-void operator delete[](void* p, const std::nothrow_t& nt) noexcept
-    ALIAS(tc_deletearray_nothrow);
-
-#if defined(ENABLE_SIZED_DELETE)
-
-void operator delete(void *p, size_t size) throw()
-    ALIAS(tc_delete_sized);
-void operator delete[](void *p, size_t size) throw()
-    ALIAS(tc_deletearray_sized);
-
-#elif defined(ENABLE_DYNAMIC_SIZED_DELETE) && \
-  (__GNUC__ * 100 + __GNUC_MINOR__) >= 405
-
-static void delegate_sized_delete(void *p, size_t s) throw() {
-  (operator delete)(p);
-}
-
-static void delegate_sized_deletearray(void *p, size_t s) throw() {
-  (operator delete[])(p);
-}
-
-extern "C" __attribute__((weak))
-int tcmalloc_sized_delete_enabled(void);
-
-static bool sized_delete_enabled(void) {
-  if (tcmalloc_sized_delete_enabled != 0) {
-    return !!tcmalloc_sized_delete_enabled();
-  }
-
-  const char *flag = TCMallocGetenvSafe("TCMALLOC_ENABLE_SIZED_DELETE");
-  return tcmalloc::commandlineflags::StringToBool(flag, false);
-}
-
-extern "C" {
-
-static void *resolve_delete_sized(void) {
-  if (sized_delete_enabled()) {
-    return reinterpret_cast<void *>(tc_delete_sized);
-  }
-  return reinterpret_cast<void *>(delegate_sized_delete);
-}
-
-static void *resolve_deletearray_sized(void) {
-  if (sized_delete_enabled()) {
-    return reinterpret_cast<void *>(tc_deletearray_sized);
-  }
-  return reinterpret_cast<void *>(delegate_sized_deletearray);
-}
-
-}
-
-void operator delete(void *p, size_t size) throw()
-  __attribute__((ifunc("resolve_delete_sized")));
-void operator delete[](void *p, size_t size) throw()
-  __attribute__((ifunc("resolve_deletearray_sized")));
-
-#else /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */
-
-void operator delete(void *p, size_t size) throw()
-  ALIAS(tc_delete);
-void operator delete[](void *p, size_t size) throw()
-  ALIAS(tc_deletearray);
-
-#endif /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */
-
-extern "C" {
-  void* malloc(size_t size) __THROW               ALIAS(tc_malloc);
-  void free(void* ptr) __THROW                    ALIAS(tc_free);
-  void* realloc(void* ptr, size_t size) __THROW   ALIAS(tc_realloc);
-  void* calloc(size_t n, size_t size) __THROW     ALIAS(tc_calloc);
-  void cfree(void* ptr) __THROW                   ALIAS(tc_cfree);
-  void* memalign(size_t align, size_t s) __THROW  ALIAS(tc_memalign);
-  void* valloc(size_t size) __THROW               ALIAS(tc_valloc);
-  void* pvalloc(size_t size) __THROW              ALIAS(tc_pvalloc);
-  int posix_memalign(void** r, size_t a, size_t s) __THROW
-      ALIAS(tc_posix_memalign);
-#ifndef __UCLIBC__
-  void malloc_stats(void) __THROW                 ALIAS(tc_malloc_stats);
-#endif
-  int mallopt(int cmd, int value) __THROW         ALIAS(tc_mallopt);
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo mallinfo(void) __THROW          ALIAS(tc_mallinfo);
-#endif
-  size_t malloc_size(void* p) __THROW             ALIAS(tc_malloc_size);
-#if defined(__ANDROID__)
-  size_t malloc_usable_size(const void* p) __THROW
-         ALIAS(tc_malloc_size);
-#else
-  size_t malloc_usable_size(void* p) __THROW      ALIAS(tc_malloc_size);
-#endif
-}   // extern "C"
-
-#undef ALIAS
-
-// No need to do anything at tcmalloc-registration time: we do it all
-// via overriding weak symbols (at link time).
-static void ReplaceSystemAlloc() { }
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_glibc.h b/contrib/libtcmalloc/src/libc_override_glibc.h
deleted file mode 100644
index cc17df315c0..00000000000
--- a/contrib/libtcmalloc/src/libc_override_glibc.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on systems that are using glibc.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
-
-#include "config.h"
-#include <features.h>     // for __GLIBC__
-#include <gperftools/tcmalloc.h>
-
-#ifndef __GLIBC__
-# error libc_override_glibc.h is for glibc distributions only.
-#endif
-
-// In glibc, the memory-allocation methods are weak symbols, so we can
-// just override them with our own.  If we're using gcc, we can use
-// __attribute__((alias)) to do the overriding easily (exception:
-// Mach-O, which doesn't support aliases).  Otherwise we have to use a
-// function call.
-#if !defined(__GNUC__) || defined(__MACH__)
-
-// This also defines ReplaceSystemAlloc().
-# include "libc_override_redefine.h"  // defines functions malloc()/etc
-
-#else  // #if !defined(__GNUC__) || defined(__MACH__)
-
-// If we get here, we're a gcc system, so do all the overriding we do
-// with gcc.  This does the overriding of all the 'normal' memory
-// allocation.  This also defines ReplaceSystemAlloc().
-# include "libc_override_gcc_and_weak.h"
-
-// We also have to do some glibc-specific overriding.  Some library
-// routines on RedHat 9 allocate memory using malloc() and free it
-// using __libc_free() (or vice-versa).  Since we provide our own
-// implementations of malloc/free, we need to make sure that the
-// __libc_XXX variants (defined as part of glibc) also point to the
-// same implementations.  Since it only matters for redhat, we
-// do it inside the gcc #ifdef, since redhat uses gcc.
-// TODO(csilvers): only do this if we detect we're an old enough glibc?
-
-#define ALIAS(tc_fn)   __attribute__ ((alias (#tc_fn)))
-extern "C" {
-  void* __libc_malloc(size_t size)                ALIAS(tc_malloc);
-  void __libc_free(void* ptr)                     ALIAS(tc_free);
-  void* __libc_realloc(void* ptr, size_t size)    ALIAS(tc_realloc);
-  void* __libc_calloc(size_t n, size_t size)      ALIAS(tc_calloc);
-  void __libc_cfree(void* ptr)                    ALIAS(tc_cfree);
-  void* __libc_memalign(size_t align, size_t s)   ALIAS(tc_memalign);
-  void* __libc_valloc(size_t size)                ALIAS(tc_valloc);
-  void* __libc_pvalloc(size_t size)               ALIAS(tc_pvalloc);
-  int __posix_memalign(void** r, size_t a, size_t s)  ALIAS(tc_posix_memalign);
-}   // extern "C"
-#undef ALIAS
-
-#endif  // #if defined(__GNUC__) && !defined(__MACH__)
-
-// No need to write ReplaceSystemAlloc(); one of the #includes above
-// did it for us.
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_osx.h b/contrib/libtcmalloc/src/libc_override_osx.h
deleted file mode 100644
index afd57d1560a..00000000000
--- a/contrib/libtcmalloc/src/libc_override_osx.h
+++ /dev/null
@@ -1,308 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used to override malloc routines on OS X systems.  We use the
-// malloc-zone functionality built into OS X to register our malloc
-// routine.
-//
-// 1) We used to use the normal 'override weak libc malloc/etc'
-// technique for OS X.  This is not optimal because mach does not
-// support the 'alias' attribute, so we had to have forwarding
-// functions.  It also does not work very well with OS X shared
-// libraries (dylibs) -- in general, the shared libs don't use
-// tcmalloc unless run with the DYLD_FORCE_FLAT_NAMESPACE envvar.
-//
-// 2) Another approach would be to use an interposition array:
-//      static const interpose_t interposers[] __attribute__((section("__DATA, __interpose"))) = {
-//        { (void *)tc_malloc, (void *)malloc },
-//        { (void *)tc_free, (void *)free },
-//      };
-// This requires the user to set the DYLD_INSERT_LIBRARIES envvar, so
-// is not much better.
-//
-// 3) Registering a new malloc zone avoids all these issues:
-//  http://www.opensource.apple.com/source/Libc/Libc-583/include/malloc/malloc.h
-//  http://www.opensource.apple.com/source/Libc/Libc-583/gen/malloc.c
-// If we make tcmalloc the default malloc zone (undocumented but
-// possible) then all new allocs use it, even those in shared
-// libraries.  Allocs done before tcmalloc was installed, or in libs
-// that aren't using tcmalloc for some reason, will correctly go
-// through the malloc-zone interface when free-ing, and will pick up
-// the libc free rather than tcmalloc free.  So it should "never"
-// cause a crash (famous last words).
-//
-// 4) The routines one must define for one's own malloc have changed
-// between OS X versions.  This requires some hoops on our part, but
-// is only really annoying when it comes to posix_memalign.  The right
-// behavior there depends on what OS version tcmalloc was compiled on,
-// but also what OS version the program is running on.  For now, we
-// punt and don't implement our own posix_memalign.  Apps that really
-// care can use tc_posix_memalign directly.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
-#define TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
-
-#include "config.h"
-#ifdef HAVE_FEATURES_H
-#include <features.h>
-#endif
-#include <gperftools/tcmalloc.h>
-
-#if !defined(__APPLE__)
-# error libc_override_glibc-osx.h is for OS X distributions only.
-#endif
-
-#include <AvailabilityMacros.h>
-#include <malloc/malloc.h>
-
-namespace tcmalloc {
-  void CentralCacheLockAll();
-  void CentralCacheUnlockAll();
-}
-
-// from AvailabilityMacros.h
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-extern "C" {
-  // This function is only available on 10.6 (and later) but the
-  // LibSystem headers do not use AvailabilityMacros.h to handle weak
-  // importing automatically.  This prototype is a copy of the one in
-  // <malloc/malloc.h> with the WEAK_IMPORT_ATTRBIUTE added.
-  extern malloc_zone_t *malloc_default_purgeable_zone(void)
-      WEAK_IMPORT_ATTRIBUTE;
-}
-#endif
-
-// We need to provide wrappers around all the libc functions.
-namespace {
-size_t mz_size(malloc_zone_t* zone, const void* ptr) {
-  if (MallocExtension::instance()->GetOwnership(ptr) != MallocExtension::kOwned)
-    return 0;  // malloc_zone semantics: return 0 if we don't own the memory
-
-  // TODO(csilvers): change this method to take a const void*, one day.
-  return MallocExtension::instance()->GetAllocatedSize(const_cast<void*>(ptr));
-}
-
-void* mz_malloc(malloc_zone_t* zone, size_t size) {
-  return tc_malloc(size);
-}
-
-void* mz_calloc(malloc_zone_t* zone, size_t num_items, size_t size) {
-  return tc_calloc(num_items, size);
-}
-
-void* mz_valloc(malloc_zone_t* zone, size_t size) {
-  return tc_valloc(size);
-}
-
-void mz_free(malloc_zone_t* zone, void* ptr) {
-  return tc_free(ptr);
-}
-
-void* mz_realloc(malloc_zone_t* zone, void* ptr, size_t size) {
-  return tc_realloc(ptr, size);
-}
-
-void* mz_memalign(malloc_zone_t* zone, size_t align, size_t size) {
-  return tc_memalign(align, size);
-}
-
-void mz_destroy(malloc_zone_t* zone) {
-  // A no-op -- we will not be destroyed!
-}
-
-// malloc_introspection callbacks.  I'm not clear on what all of these do.
-kern_return_t mi_enumerator(task_t task, void *,
-                            unsigned type_mask, vm_address_t zone_address,
-                            memory_reader_t reader,
-                            vm_range_recorder_t recorder) {
-  // Should enumerate all the pointers we have.  Seems like a lot of work.
-  return KERN_FAILURE;
-}
-
-size_t mi_good_size(malloc_zone_t *zone, size_t size) {
-  // I think it's always safe to return size, but we maybe could do better.
-  return size;
-}
-
-boolean_t mi_check(malloc_zone_t *zone) {
-  return MallocExtension::instance()->VerifyAllMemory();
-}
-
-void mi_print(malloc_zone_t *zone, boolean_t verbose) {
-  int bufsize = 8192;
-  if (verbose)
-    bufsize = 102400;   // I picked this size arbitrarily
-  char* buffer = new char[bufsize];
-  MallocExtension::instance()->GetStats(buffer, bufsize);
-  fprintf(stdout, "%s", buffer);
-  delete[] buffer;
-}
-
-void mi_log(malloc_zone_t *zone, void *address) {
-  // I don't think we support anything like this
-}
-
-void mi_force_lock(malloc_zone_t *zone) {
-  tcmalloc::CentralCacheLockAll();
-}
-
-void mi_force_unlock(malloc_zone_t *zone) {
-  tcmalloc::CentralCacheUnlockAll();
-}
-
-void mi_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
-  // TODO(csilvers): figure out how to fill these out
-  stats->blocks_in_use = 0;
-  stats->size_in_use = 0;
-  stats->max_size_in_use = 0;
-  stats->size_allocated = 0;
-}
-
-boolean_t mi_zone_locked(malloc_zone_t *zone) {
-  return false;  // Hopefully unneeded by us!
-}
-
-}  // unnamed namespace
-
-// OS X doesn't have pvalloc, cfree, malloc_statc, etc, so we can just
-// define our own. :-)  OS X supplies posix_memalign in some versions
-// but not others, either strongly or weakly linked, in a way that's
-// difficult enough to code to correctly, that I just don't try to
-// support either memalign() or posix_memalign().  If you need them
-// and are willing to code to tcmalloc, you can use tc_posix_memalign().
-extern "C" {
-  void  cfree(void* p)                   { tc_cfree(p);               }
-  void* pvalloc(size_t s)                { return tc_pvalloc(s);      }
-  void malloc_stats(void)                { tc_malloc_stats();         }
-  int mallopt(int cmd, int v)            { return tc_mallopt(cmd, v); }
-  // No struct mallinfo on OS X, so don't define mallinfo().
-  // An alias for malloc_size(), which OS X defines.
-  size_t malloc_usable_size(void* p)     { return tc_malloc_size(p); }
-}  // extern "C"
-
-static malloc_zone_t *get_default_zone() {
-   malloc_zone_t **zones = NULL;
-   unsigned int num_zones = 0;
-
-   /*
-    * On OSX 10.12, malloc_default_zone returns a special zone that is not
-    * present in the list of registered zones. That zone uses a "lite zone"
-    * if one is present (apparently enabled when malloc stack logging is
-    * enabled), or the first registered zone otherwise. In practice this
-    * means unless malloc stack logging is enabled, the first registered
-    * zone is the default.
-    * So get the list of zones to get the first one, instead of relying on
-    * malloc_default_zone.
-    */
-   if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
-                                            &num_zones)) {
-       /* Reset the value in case the failure happened after it was set. */
-       num_zones = 0;
-   }
-
-   if (num_zones)
-     return zones[0];
-
-   return malloc_default_zone();
-}
-
-
-static void ReplaceSystemAlloc() {
-  static malloc_introspection_t tcmalloc_introspection;
-  memset(&tcmalloc_introspection, 0, sizeof(tcmalloc_introspection));
-
-  tcmalloc_introspection.enumerator = &mi_enumerator;
-  tcmalloc_introspection.good_size = &mi_good_size;
-  tcmalloc_introspection.check = &mi_check;
-  tcmalloc_introspection.print = &mi_print;
-  tcmalloc_introspection.log = &mi_log;
-  tcmalloc_introspection.force_lock = &mi_force_lock;
-  tcmalloc_introspection.force_unlock = &mi_force_unlock;
-
-  static malloc_zone_t tcmalloc_zone;
-  memset(&tcmalloc_zone, 0, sizeof(malloc_zone_t));
-
-  // Start with a version 4 zone which is used for OS X 10.4 and 10.5.
-  tcmalloc_zone.version = 4;
-  tcmalloc_zone.zone_name = "tcmalloc";
-  tcmalloc_zone.size = &mz_size;
-  tcmalloc_zone.malloc = &mz_malloc;
-  tcmalloc_zone.calloc = &mz_calloc;
-  tcmalloc_zone.valloc = &mz_valloc;
-  tcmalloc_zone.free = &mz_free;
-  tcmalloc_zone.realloc = &mz_realloc;
-  tcmalloc_zone.destroy = &mz_destroy;
-  tcmalloc_zone.batch_malloc = NULL;
-  tcmalloc_zone.batch_free = NULL;
-  tcmalloc_zone.introspect = &tcmalloc_introspection;
-
-  // from AvailabilityMacros.h
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-  // Switch to version 6 on OSX 10.6 to support memalign.
-  tcmalloc_zone.version = 6;
-  tcmalloc_zone.free_definite_size = NULL;
-  tcmalloc_zone.memalign = &mz_memalign;
-  tcmalloc_introspection.zone_locked = &mi_zone_locked;
-
-  // Request the default purgable zone to force its creation. The
-  // current default zone is registered with the purgable zone for
-  // doing tiny and small allocs.  Sadly, it assumes that the default
-  // zone is the szone implementation from OS X and will crash if it
-  // isn't.  By creating the zone now, this will be true and changing
-  // the default zone won't cause a problem.  This only needs to
-  // happen when actually running on OS X 10.6 and higher (note the
-  // ifdef above only checks if we were *compiled* with 10.6 or
-  // higher; at runtime we have to check if this symbol is defined.)
-  if (malloc_default_purgeable_zone) {
-    malloc_default_purgeable_zone();
-  }
-#endif
-
-  // Register the tcmalloc zone. At this point, it will not be the
-  // default zone.
-  malloc_zone_register(&tcmalloc_zone);
-
-  // Unregister and reregister the default zone.  Unregistering swaps
-  // the specified zone with the last one registered which for the
-  // default zone makes the more recently registered zone the default
-  // zone.  The default zone is then re-registered to ensure that
-  // allocations made from it earlier will be handled correctly.
-  // Things are not guaranteed to work that way, but it's how they work now.
-  malloc_zone_t *default_zone = get_default_zone();
-  malloc_zone_unregister(default_zone);
-  malloc_zone_register(default_zone);
-}
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_
diff --git a/contrib/libtcmalloc/src/libc_override_redefine.h b/contrib/libtcmalloc/src/libc_override_redefine.h
deleted file mode 100644
index 72679ef38b8..00000000000
--- a/contrib/libtcmalloc/src/libc_override_redefine.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Used on systems that don't have their own definition of
-// malloc/new/etc.  (Typically this will be a windows msvcrt.dll that
-// has been edited to remove the definitions.)  We can just define our
-// own as normal functions.
-//
-// This should also work on systems were all the malloc routines are
-// defined as weak symbols, and there's no support for aliasing.
-
-#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
-#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
-
-void* operator new(size_t size)                  { return tc_new(size);       }
-void operator delete(void* p) throw()            { tc_delete(p);              }
-void* operator new[](size_t size)                { return tc_newarray(size);  }
-void operator delete[](void* p) throw()          { tc_deletearray(p);         }
-void* operator new(size_t size, const std::nothrow_t& nt) throw() {
-  return tc_new_nothrow(size, nt);
-}
-void* operator new[](size_t size, const std::nothrow_t& nt) throw() {
-  return tc_newarray_nothrow(size, nt);
-}
-void operator delete(void* ptr, const std::nothrow_t& nt) throw() {
-  return tc_delete_nothrow(ptr, nt);
-}
-void operator delete[](void* ptr, const std::nothrow_t& nt) throw() {
-  return tc_deletearray_nothrow(ptr, nt);
-}
-
-#ifdef ENABLE_SIZED_DELETE
-void operator delete(void* p, size_t s) throw()  { tc_delete_sized(p, s);     }
-void operator delete[](void* p, size_t s) throw(){ tc_deletearray_sized(p);   }
-#endif
-
-extern "C" {
-  void* malloc(size_t s)                         { return tc_malloc(s);       }
-  void  free(void* p)                            { tc_free(p);                }
-  void* realloc(void* p, size_t s)               { return tc_realloc(p, s);   }
-  void* calloc(size_t n, size_t s)               { return tc_calloc(n, s);    }
-  void  cfree(void* p)                           { tc_cfree(p);               }
-  void* memalign(size_t a, size_t s)             { return tc_memalign(a, s);  }
-  void* valloc(size_t s)                         { return tc_valloc(s);       }
-  void* pvalloc(size_t s)                        { return tc_pvalloc(s);      }
-  int posix_memalign(void** r, size_t a, size_t s)         {
-    return tc_posix_memalign(r, a, s);
-  }
-  void malloc_stats(void)                        { tc_malloc_stats();         }
-  int mallopt(int cmd, int v)                    { return tc_mallopt(cmd, v); }
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo mallinfo(void)                 { return tc_mallinfo();      }
-#endif
-  size_t malloc_size(void* p)                    { return tc_malloc_size(p); }
-  size_t malloc_usable_size(void* p)             { return tc_malloc_size(p); }
-}  // extern "C"
-
-// No need to do anything at tcmalloc-registration time: we do it all
-// via overriding weak symbols (at link time).
-static void ReplaceSystemAlloc() { }
-
-#endif  // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
diff --git a/contrib/libtcmalloc/src/linked_list.h b/contrib/libtcmalloc/src/linked_list.h
deleted file mode 100644
index 66a07410760..00000000000
--- a/contrib/libtcmalloc/src/linked_list.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Some very basic linked list functions for dealing with using void * as
-// storage.
-
-#ifndef TCMALLOC_LINKED_LIST_H_
-#define TCMALLOC_LINKED_LIST_H_
-
-#include <stddef.h>
-
-namespace tcmalloc {
-
-inline void *SLL_Next(void *t) {
-  return *(reinterpret_cast<void**>(t));
-}
-
-inline void SLL_SetNext(void *t, void *n) {
-  *(reinterpret_cast<void**>(t)) = n;
-}
-
-inline void SLL_Push(void **list, void *element) {
-  SLL_SetNext(element, *list);
-  *list = element;
-}
-
-inline void *SLL_Pop(void **list) {
-  void *result = *list;
-  *list = SLL_Next(*list);
-  return result;
-}
-
-// Remove N elements from a linked list to which head points.  head will be
-// modified to point to the new head.  start and end will point to the first
-// and last nodes of the range.  Note that end will point to NULL after this
-// function is called.
-inline void SLL_PopRange(void **head, int N, void **start, void **end) {
-  if (N == 0) {
-    *start = NULL;
-    *end = NULL;
-    return;
-  }
-
-  void *tmp = *head;
-  for (int i = 1; i < N; ++i) {
-    tmp = SLL_Next(tmp);
-  }
-
-  *start = *head;
-  *end = tmp;
-  *head = SLL_Next(tmp);
-  // Unlink range from list.
-  SLL_SetNext(tmp, NULL);
-}
-
-inline void SLL_PushRange(void **head, void *start, void *end) {
-  if (!start) return;
-  SLL_SetNext(end, *head);
-  *head = start;
-}
-
-inline size_t SLL_Size(void *head) {
-  int count = 0;
-  while (head) {
-    count++;
-    head = SLL_Next(head);
-  }
-  return count;
-}
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_LINKED_LIST_H_
diff --git a/contrib/libtcmalloc/src/malloc_extension.cc b/contrib/libtcmalloc/src/malloc_extension.cc
deleted file mode 100644
index 13a06f4dbe8..00000000000
--- a/contrib/libtcmalloc/src/malloc_extension.cc
+++ /dev/null
@@ -1,388 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include <assert.h>
-#include <string.h>
-#include <stdio.h>
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include <string>
-#include "base/dynamic_annotations.h"
-#include "base/sysinfo.h"    // for FillProcSelfMaps
-#ifndef NO_HEAP_CHECK
-#include "gperftools/heap-checker.h"
-#endif
-#include "gperftools/malloc_extension.h"
-#include "gperftools/malloc_extension_c.h"
-#include "maybe_threads.h"
-#include "base/googleinit.h"
-
-using STL_NAMESPACE::string;
-using STL_NAMESPACE::vector;
-
-static void DumpAddressMap(string* result) {
-  *result += "\nMAPPED_LIBRARIES:\n";
-  // We keep doubling until we get a fit
-  const size_t old_resultlen = result->size();
-  for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) {
-    result->resize(old_resultlen + amap_size);
-    bool wrote_all = false;
-    const int bytes_written =
-        tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size,
-                                   &wrote_all);
-    if (wrote_all) {   // we fit!
-      (*result)[old_resultlen + bytes_written] = '\0';
-      result->resize(old_resultlen + bytes_written);
-      return;
-    }
-  }
-  result->reserve(old_resultlen);   // just don't print anything
-}
-
-// Note: this routine is meant to be called before threads are spawned.
-void MallocExtension::Initialize() {
-  static bool initialize_called = false;
-
-  if (initialize_called) return;
-  initialize_called = true;
-
-#ifdef __GLIBC__
-  // GNU libc++ versions 3.3 and 3.4 obey the environment variables
-  // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting
-  // one of these variables forces the STL default allocator to call
-  // new() or delete() for each allocation or deletion.  Otherwise
-  // the STL allocator tries to avoid the high cost of doing
-  // allocations by pooling memory internally.  However, tcmalloc
-  // does allocations really fast, especially for the types of small
-  // items one sees in STL, so it's better off just using us.
-  // TODO: control whether we do this via an environment variable?
-  setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
-  setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
-
-  // Now we need to make the setenv 'stick', which it may not do since
-  // the env is flakey before main() is called.  But luckily stl only
-  // looks at this env var the first time it tries to do an alloc, and
-  // caches what it finds.  So we just cause an stl alloc here.
-  string dummy("I need to be allocated");
-  dummy += "!";         // so the definition of dummy isn't optimized out
-#endif  /* __GLIBC__ */
-}
-
-// SysAllocator implementation
-SysAllocator::~SysAllocator() {}
-
-// Default implementation -- does nothing
-MallocExtension::~MallocExtension() { }
-bool MallocExtension::VerifyAllMemory() { return true; }
-bool MallocExtension::VerifyNewMemory(const void* p) { return true; }
-bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; }
-bool MallocExtension::VerifyMallocMemory(const void* p) { return true; }
-
-bool MallocExtension::GetNumericProperty(const char* property, size_t* value) {
-  return false;
-}
-
-bool MallocExtension::SetNumericProperty(const char* property, size_t value) {
-  return false;
-}
-
-void MallocExtension::GetStats(char* buffer, int length) {
-  assert(length > 0);
-  buffer[0] = '\0';
-}
-
-bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total,
-                                       int histogram[kMallocHistogramSize]) {
-  *blocks = 0;
-  *total = 0;
-  memset(histogram, 0, sizeof(*histogram) * kMallocHistogramSize);
-  return true;
-}
-
-void** MallocExtension::ReadStackTraces(int* sample_period) {
-  return NULL;
-}
-
-void** MallocExtension::ReadHeapGrowthStackTraces() {
-  return NULL;
-}
-
-void MallocExtension::MarkThreadIdle() {
-  // Default implementation does nothing
-}
-
-void MallocExtension::MarkThreadBusy() {
-  // Default implementation does nothing
-}
-
-SysAllocator* MallocExtension::GetSystemAllocator() {
-  return NULL;
-}
-
-void MallocExtension::SetSystemAllocator(SysAllocator *a) {
-  // Default implementation does nothing
-}
-
-void MallocExtension::ReleaseToSystem(size_t num_bytes) {
-  // Default implementation does nothing
-}
-
-void MallocExtension::ReleaseFreeMemory() {
-  ReleaseToSystem(static_cast<size_t>(-1));   // SIZE_T_MAX
-}
-
-void MallocExtension::SetMemoryReleaseRate(double rate) {
-  // Default implementation does nothing
-}
-
-double MallocExtension::GetMemoryReleaseRate() {
-  return -1.0;
-}
-
-size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
-  return size;
-}
-
-size_t MallocExtension::GetAllocatedSize(const void* p) {
-  assert(GetOwnership(p) != kNotOwned);
-  return 0;
-}
-
-MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) {
-  return kUnknownOwnership;
-}
-
-void MallocExtension::GetFreeListSizes(
-    vector<MallocExtension::FreeListInfo>* v) {
-  v->clear();
-}
-
-size_t MallocExtension::GetThreadCacheSize() {
-  return 0;
-}
-
-void MallocExtension::MarkThreadTemporarilyIdle() {
-  // Default implementation does nothing
-}
-
-// The current malloc extension object.
-
-static MallocExtension* current_instance;
-
-static void InitModule() {
-  if (current_instance != NULL) {
-    return;
-  }
-  current_instance = new MallocExtension;
-#ifndef NO_HEAP_CHECK
-  HeapLeakChecker::IgnoreObject(current_instance);
-#endif
-}
-
-REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule())
-
-MallocExtension* MallocExtension::instance() {
-  InitModule();
-  return current_instance;
-}
-
-void MallocExtension::Register(MallocExtension* implementation) {
-  InitModule();
-  // When running under valgrind, our custom malloc is replaced with
-  // valgrind's one and malloc extensions will not work.  (Note:
-  // callers should be responsible for checking that they are the
-  // malloc that is really being run, before calling Register.  This
-  // is just here as an extra sanity check.)
-  if (!RunningOnValgrind()) {
-    current_instance = implementation;
-  }
-}
-
-// -----------------------------------------------------------------------
-// Heap sampling support
-// -----------------------------------------------------------------------
-
-namespace {
-
-// Accessors
-uintptr_t Count(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[0]);
-}
-uintptr_t Size(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[1]);
-}
-uintptr_t Depth(void** entry) {
-  return reinterpret_cast<uintptr_t>(entry[2]);
-}
-void* PC(void** entry, int i) {
-  return entry[3+i];
-}
-
-void PrintCountAndSize(MallocExtensionWriter* writer,
-                       uintptr_t count, uintptr_t size) {
-  char buf[100];
-  snprintf(buf, sizeof(buf),
-           "%6" PRIu64 ": %8" PRIu64 " [%6" PRIu64 ": %8" PRIu64 "] @",
-           static_cast<uint64>(count),
-           static_cast<uint64>(size),
-           static_cast<uint64>(count),
-           static_cast<uint64>(size));
-  writer->append(buf, strlen(buf));
-}
-
-void PrintHeader(MallocExtensionWriter* writer,
-                 const char* label, void** entries) {
-  // Compute the total count and total size
-  uintptr_t total_count = 0;
-  uintptr_t total_size = 0;
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    total_count += Count(entry);
-    total_size += Size(entry);
-  }
-
-  const char* const kTitle = "heap profile: ";
-  writer->append(kTitle, strlen(kTitle));
-  PrintCountAndSize(writer, total_count, total_size);
-  writer->append(" ", 1);
-  writer->append(label, strlen(label));
-  writer->append("\n", 1);
-}
-
-void PrintStackEntry(MallocExtensionWriter* writer, void** entry) {
-  PrintCountAndSize(writer, Count(entry), Size(entry));
-
-  for (int i = 0; i < Depth(entry); i++) {
-    char buf[32];
-    snprintf(buf, sizeof(buf), " %p", PC(entry, i));
-    writer->append(buf, strlen(buf));
-  }
-  writer->append("\n", 1);
-}
-
-}
-
-void MallocExtension::GetHeapSample(MallocExtensionWriter* writer) {
-  int sample_period = 0;
-  void** entries = ReadStackTraces(&sample_period);
-  if (entries == NULL) {
-    const char* const kErrorMsg =
-        "This malloc implementation does not support sampling.\n"
-        "As of 2005/01/26, only tcmalloc supports sampling, and\n"
-        "you are probably running a binary that does not use\n"
-        "tcmalloc.\n";
-    writer->append(kErrorMsg, strlen(kErrorMsg));
-    return;
-  }
-
-  char label[32];
-  sprintf(label, "heap_v2/%d", sample_period);
-  PrintHeader(writer, label, entries);
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    PrintStackEntry(writer, entry);
-  }
-  delete[] entries;
-
-  DumpAddressMap(writer);
-}
-
-void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) {
-  void** entries = ReadHeapGrowthStackTraces();
-  if (entries == NULL) {
-    const char* const kErrorMsg =
-        "This malloc implementation does not support "
-        "ReadHeapGrowthStackTraces().\n"
-        "As of 2005/09/27, only tcmalloc supports this, and you\n"
-        "are probably running a binary that does not use tcmalloc.\n";
-    writer->append(kErrorMsg, strlen(kErrorMsg));
-    return;
-  }
-
-  // Do not canonicalize the stack entries, so that we get a
-  // time-ordered list of stack traces, which may be useful if the
-  // client wants to focus on the latest stack traces.
-  PrintHeader(writer, "growth", entries);
-  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
-    PrintStackEntry(writer, entry);
-  }
-  delete[] entries;
-
-  DumpAddressMap(writer);
-}
-
-void MallocExtension::Ranges(void* arg, RangeFunction func) {
-  // No callbacks by default
-}
-
-// These are C shims that work on the current instance.
-
-#define C_SHIM(fn, retval, paramlist, arglist)          \
-  extern "C" PERFTOOLS_DLL_DECL retval MallocExtension_##fn paramlist {    \
-    return MallocExtension::instance()->fn arglist;     \
-  }
-
-C_SHIM(VerifyAllMemory, int, (void), ());
-C_SHIM(VerifyNewMemory, int, (const void* p), (p));
-C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p));
-C_SHIM(VerifyMallocMemory, int, (const void* p), (p));
-C_SHIM(MallocMemoryStats, int,
-       (int* blocks, size_t* total, int histogram[kMallocHistogramSize]),
-       (blocks, total, histogram));
-
-C_SHIM(GetStats, void,
-       (char* buffer, int buffer_length), (buffer, buffer_length));
-C_SHIM(GetNumericProperty, int,
-       (const char* property, size_t* value), (property, value));
-C_SHIM(SetNumericProperty, int,
-       (const char* property, size_t value), (property, value));
-
-C_SHIM(MarkThreadIdle, void, (void), ());
-C_SHIM(MarkThreadBusy, void, (void), ());
-C_SHIM(ReleaseFreeMemory, void, (void), ());
-C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes));
-C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size));
-C_SHIM(GetAllocatedSize, size_t, (const void* p), (p));
-C_SHIM(GetThreadCacheSize, size_t, (void), ());
-C_SHIM(MarkThreadTemporarilyIdle, void, (void), ());
-
-// Can't use the shim here because of the need to translate the enums.
-extern "C"
-MallocExtension_Ownership MallocExtension_GetOwnership(const void* p) {
-  return static_cast<MallocExtension_Ownership>(
-      MallocExtension::instance()->GetOwnership(p));
-}
diff --git a/contrib/libtcmalloc/src/malloc_hook-inl.h b/contrib/libtcmalloc/src/malloc_hook-inl.h
deleted file mode 100644
index dbf4d46ed47..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook-inl.h
+++ /dev/null
@@ -1,249 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// This has the implementation details of malloc_hook that are needed
-// to use malloc-hook inside the tcmalloc system.  It does not hold
-// any of the client-facing calls that are used to add new hooks.
-
-#ifndef _MALLOC_HOOK_INL_H_
-#define _MALLOC_HOOK_INL_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-#include "base/atomicops.h"
-#include "base/basictypes.h"
-#include <gperftools/malloc_hook.h>
-
-#include "common.h" // for UNLIKELY
-
-namespace base { namespace internal {
-
-// Capacity of 8 means that HookList is 9 words.
-static const int kHookListCapacity = 8;
-// last entry is reserved for deprecated "singular" hooks. So we have
-// 7 "normal" hooks per list
-static const int kHookListMaxValues = 7;
-static const int kHookListSingularIdx = 7;
-
-// HookList: a class that provides synchronized insertions and removals and
-// lockless traversal.  Most of the implementation is in malloc_hook.cc.
-template <typename T>
-struct PERFTOOLS_DLL_DECL HookList {
-  COMPILE_ASSERT(sizeof(T) <= sizeof(AtomicWord), T_should_fit_in_AtomicWord);
-
-  // Adds value to the list.  Note that duplicates are allowed.  Thread-safe and
-  // blocking (acquires hooklist_spinlock).  Returns true on success; false
-  // otherwise (failures include invalid value and no space left).
-  bool Add(T value);
-
-  void FixupPrivEndLocked();
-
-  // Removes the first entry matching value from the list.  Thread-safe and
-  // blocking (acquires hooklist_spinlock).  Returns true on success; false
-  // otherwise (failures include invalid value and no value found).
-  bool Remove(T value);
-
-  // Store up to n values of the list in output_array, and return the number of
-  // elements stored.  Thread-safe and non-blocking.  This is fast (one memory
-  // access) if the list is empty.
-  int Traverse(T* output_array, int n) const;
-
-  // Fast inline implementation for fast path of Invoke*Hook.
-  bool empty() const {
-    return base::subtle::NoBarrier_Load(&priv_end) == 0;
-  }
-
-  // Used purely to handle deprecated singular hooks
-  T GetSingular() const {
-    const AtomicWord *place = &priv_data[kHookListSingularIdx];
-    return bit_cast<T>(base::subtle::NoBarrier_Load(place));
-  }
-
-  T ExchangeSingular(T new_val);
-
-  // This internal data is not private so that the class is an aggregate and can
-  // be initialized by the linker.  Don't access this directly.  Use the
-  // INIT_HOOK_LIST macro in malloc_hook.cc.
-
-  // One more than the index of the last valid element in priv_data.  During
-  // 'Remove' this may be past the last valid element in priv_data, but
-  // subsequent values will be 0.
-  //
-  // Index kHookListCapacity-1 is reserved as 'deprecated' single hook pointer
-  AtomicWord priv_end;
-  AtomicWord priv_data[kHookListCapacity];
-};
-
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::NewHook> new_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::DeleteHook> delete_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreMmapHook> premmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapHook> mmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapReplacement> mmap_replacement_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapHook> munmap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapReplacement> munmap_replacement_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MremapHook> mremap_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_;
-ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::SbrkHook> sbrk_hooks_;
-
-} }  // namespace base::internal
-
-// The following method is DEPRECATED
-inline MallocHook::NewHook MallocHook::GetNewHook() {
-  return base::internal::new_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeNewHook(const void* p, size_t s) {
-  if (UNLIKELY(!base::internal::new_hooks_.empty())) {
-    InvokeNewHookSlow(p, s);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::DeleteHook MallocHook::GetDeleteHook() {
-  return base::internal::delete_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeDeleteHook(const void* p) {
-  if (UNLIKELY(!base::internal::delete_hooks_.empty())) {
-    InvokeDeleteHookSlow(p);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() {
-  return base::internal::premmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokePreMmapHook(const void* start,
-                                          size_t size,
-                                          int protection,
-                                          int flags,
-                                          int fd,
-                                          off_t offset) {
-  if (!base::internal::premmap_hooks_.empty()) {
-    InvokePreMmapHookSlow(start, size, protection, flags, fd, offset);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MmapHook MallocHook::GetMmapHook() {
-  return base::internal::mmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMmapHook(const void* result,
-                                       const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset) {
-  if (!base::internal::mmap_hooks_.empty()) {
-    InvokeMmapHookSlow(result, start, size, protection, flags, fd, offset);
-  }
-}
-
-inline bool MallocHook::InvokeMmapReplacement(const void* start,
-                                              size_t size,
-                                              int protection,
-                                              int flags,
-                                              int fd,
-                                              off_t offset,
-                                              void** result) {
-  if (!base::internal::mmap_replacement_.empty()) {
-    return InvokeMmapReplacementSlow(start, size,
-                                     protection, flags,
-                                     fd, offset,
-                                     result);
-  }
-  return false;
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MunmapHook MallocHook::GetMunmapHook() {
-  return base::internal::munmap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) {
-  if (!base::internal::munmap_hooks_.empty()) {
-    InvokeMunmapHookSlow(p, size);
-  }
-}
-
-inline bool MallocHook::InvokeMunmapReplacement(
-    const void* p, size_t size, int* result) {
-  if (!base::internal::mmap_replacement_.empty()) {
-    return InvokeMunmapReplacementSlow(p, size, result);
-  }
-  return false;
-}
-
-// The following method is DEPRECATED
-inline MallocHook::MremapHook MallocHook::GetMremapHook() {
-  return base::internal::mremap_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeMremapHook(const void* result,
-                                         const void* old_addr,
-                                         size_t old_size,
-                                         size_t new_size,
-                                         int flags,
-                                         const void* new_addr) {
-  if (!base::internal::mremap_hooks_.empty()) {
-    InvokeMremapHookSlow(result, old_addr, old_size, new_size, flags, new_addr);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() {
-  return base::internal::presbrk_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokePreSbrkHook(ptrdiff_t increment) {
-  if (!base::internal::presbrk_hooks_.empty() && increment != 0) {
-    InvokePreSbrkHookSlow(increment);
-  }
-}
-
-// The following method is DEPRECATED
-inline MallocHook::SbrkHook MallocHook::GetSbrkHook() {
-  return base::internal::sbrk_hooks_.GetSingular();
-}
-
-inline void MallocHook::InvokeSbrkHook(const void* result,
-                                       ptrdiff_t increment) {
-  if (!base::internal::sbrk_hooks_.empty() && increment != 0) {
-    InvokeSbrkHookSlow(result, increment);
-  }
-}
-
-#endif /* _MALLOC_HOOK_INL_H_ */
diff --git a/contrib/libtcmalloc/src/malloc_hook.cc b/contrib/libtcmalloc/src/malloc_hook.cc
deleted file mode 100644
index f87da8abbeb..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook.cc
+++ /dev/null
@@ -1,703 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-
-// Disable the glibc prototype of mremap(), as older versions of the
-// system headers define this function with only four arguments,
-// whereas newer versions allow an optional fifth argument:
-#ifdef HAVE_MMAP
-# define mremap glibc_mremap
-# include <sys/mman.h>
-# undef mremap
-#endif
-
-#include <stddef.h>
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-#include <algorithm>
-#include "base/logging.h"
-#include "base/spinlock.h"
-#include "maybe_emergency_malloc.h"
-#include "maybe_threads.h"
-#include "malloc_hook-inl.h"
-#include <gperftools/malloc_hook.h>
-
-// This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
-// you're porting to a system where you really can't get a stacktrace.
-#ifdef NO_TCMALLOC_SAMPLES
-  // We use #define so code compiles even if you #include stacktrace.h somehow.
-# define GetStackTrace(stack, depth, skip)  (0)
-#else
-# include <gperftools/stacktrace.h>
-#endif
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-using std::copy;
-
-
-// Declaration of default weak initialization function, that can be overridden
-// by linking-in a strong definition (as heap-checker.cc does).  This is
-// extern "C" so that it doesn't trigger gold's --detect-odr-violations warning,
-// which only looks at C++ symbols.
-//
-// This function is declared here as weak, and defined later, rather than a more
-// straightforward simple weak definition, as a workround for an icc compiler
-// issue ((Intel reference 290819).  This issue causes icc to resolve weak
-// symbols too early, at compile rather than link time.  By declaring it (weak)
-// here, then defining it below after its use, we can avoid the problem.
-extern "C" {
-ATTRIBUTE_WEAK void MallocHook_InitAtFirstAllocation_HeapLeakChecker();
-}
-
-namespace {
-
-void RemoveInitialHooksAndCallInitializers();  // below.
-
-pthread_once_t once = PTHREAD_ONCE_INIT;
-
-// These hooks are installed in MallocHook as the only initial hooks.  The first
-// hook that is called will run RemoveInitialHooksAndCallInitializers (see the
-// definition below) and then redispatch to any malloc hooks installed by
-// RemoveInitialHooksAndCallInitializers.
-//
-// Note(llib): there is a possibility of a race in the event that there are
-// multiple threads running before the first allocation.  This is pretty
-// difficult to achieve, but if it is then multiple threads may concurrently do
-// allocations.  The first caller will call
-// RemoveInitialHooksAndCallInitializers via one of the initial hooks.  A
-// concurrent allocation may, depending on timing either:
-// * still have its initial malloc hook installed, run that and block on waiting
-//   for the first caller to finish its call to
-//   RemoveInitialHooksAndCallInitializers, and proceed normally.
-// * occur some time during the RemoveInitialHooksAndCallInitializers call, at
-//   which point there could be no initial hooks and the subsequent hooks that
-//   are about to be set up by RemoveInitialHooksAndCallInitializers haven't
-//   been installed yet.  I think the worst we can get is that some allocations
-//   will not get reported to some hooks set by the initializers called from
-//   RemoveInitialHooksAndCallInitializers.
-
-void InitialNewHook(const void* ptr, size_t size) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokeNewHook(ptr, size);
-}
-
-void InitialPreMMapHook(const void* start,
-                               size_t size,
-                               int protection,
-                               int flags,
-                               int fd,
-                               off_t offset) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokePreMmapHook(start, size, protection, flags, fd, offset);
-}
-
-void InitialPreSbrkHook(ptrdiff_t increment) {
-  perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers);
-  MallocHook::InvokePreSbrkHook(increment);
-}
-
-// This function is called at most once by one of the above initial malloc
-// hooks.  It removes all initial hooks and initializes all other clients that
-// want to get control at the very first memory allocation.  The initializers
-// may assume that the initial malloc hooks have been removed.  The initializers
-// may set up malloc hooks and allocate memory.
-void RemoveInitialHooksAndCallInitializers() {
-  RAW_CHECK(MallocHook::RemoveNewHook(&InitialNewHook), "");
-  RAW_CHECK(MallocHook::RemovePreMmapHook(&InitialPreMMapHook), "");
-  RAW_CHECK(MallocHook::RemovePreSbrkHook(&InitialPreSbrkHook), "");
-
-  // HeapLeakChecker is currently the only module that needs to get control on
-  // the first memory allocation, but one can add other modules by following the
-  // same weak/strong function pattern.
-  MallocHook_InitAtFirstAllocation_HeapLeakChecker();
-}
-
-}  // namespace
-
-// Weak default initialization function that must go after its use.
-extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() {
-  // Do nothing.
-}
-
-namespace base { namespace internal {
-
-// This lock is shared between all implementations of HookList::Add & Remove.
-// The potential for contention is very small.  This needs to be a SpinLock and
-// not a Mutex since it's possible for Mutex locking to allocate memory (e.g.,
-// per-thread allocation in debug builds), which could cause infinite recursion.
-static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED);
-
-template <typename T>
-bool HookList<T>::Add(T value_as_t) {
-  AtomicWord value = bit_cast<AtomicWord>(value_as_t);
-  if (value == 0) {
-    return false;
-  }
-  SpinLockHolder l(&hooklist_spinlock);
-  // Find the first slot in data that is 0.
-  int index = 0;
-  while ((index < kHookListMaxValues) &&
-         (base::subtle::NoBarrier_Load(&priv_data[index]) != 0)) {
-    ++index;
-  }
-  if (index == kHookListMaxValues) {
-    return false;
-  }
-  AtomicWord prev_num_hooks = base::subtle::Acquire_Load(&priv_end);
-  base::subtle::NoBarrier_Store(&priv_data[index], value);
-  if (prev_num_hooks <= index) {
-    base::subtle::NoBarrier_Store(&priv_end, index + 1);
-  }
-  return true;
-}
-
-template <typename T>
-void HookList<T>::FixupPrivEndLocked() {
-  AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end);
-  while ((hooks_end > 0) &&
-         (base::subtle::NoBarrier_Load(&priv_data[hooks_end - 1]) == 0)) {
-    --hooks_end;
-  }
-  base::subtle::NoBarrier_Store(&priv_end, hooks_end);
-}
-
-template <typename T>
-bool HookList<T>::Remove(T value_as_t) {
-  if (value_as_t == 0) {
-    return false;
-  }
-  SpinLockHolder l(&hooklist_spinlock);
-  AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end);
-  int index = 0;
-  while (index < hooks_end && value_as_t != bit_cast<T>(
-             base::subtle::NoBarrier_Load(&priv_data[index]))) {
-    ++index;
-  }
-  if (index == hooks_end) {
-    return false;
-  }
-  base::subtle::NoBarrier_Store(&priv_data[index], 0);
-  FixupPrivEndLocked();
-  return true;
-}
-
-template <typename T>
-int HookList<T>::Traverse(T* output_array, int n) const {
-  AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end);
-  int actual_hooks_end = 0;
-  for (int i = 0; i < hooks_end && n > 0; ++i) {
-    AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]);
-    if (data != 0) {
-      *output_array++ = bit_cast<T>(data);
-      ++actual_hooks_end;
-      --n;
-    }
-  }
-  return actual_hooks_end;
-}
-
-template <typename T>
-T HookList<T>::ExchangeSingular(T value_as_t) {
-  AtomicWord value = bit_cast<AtomicWord>(value_as_t);
-  AtomicWord old_value;
-  SpinLockHolder l(&hooklist_spinlock);
-  old_value = base::subtle::NoBarrier_Load(&priv_data[kHookListSingularIdx]);
-  base::subtle::NoBarrier_Store(&priv_data[kHookListSingularIdx], value);
-  if (value != 0) {
-    base::subtle::NoBarrier_Store(&priv_end, kHookListSingularIdx + 1);
-  } else {
-    FixupPrivEndLocked();
-  }
-  return bit_cast<T>(old_value);
-}
-
-// Initialize a HookList (optionally with the given initial_value in index 0).
-#define INIT_HOOK_LIST { 0 }
-#define INIT_HOOK_LIST_WITH_VALUE(initial_value)                \
-  { 1, { reinterpret_cast<AtomicWord>(initial_value) } }
-
-// Explicit instantiation for malloc_hook_test.cc.  This ensures all the methods
-// are instantiated.
-template struct HookList<MallocHook::NewHook>;
-
-HookList<MallocHook::NewHook> new_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(&InitialNewHook);
-HookList<MallocHook::DeleteHook> delete_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::PreMmapHook> premmap_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(&InitialPreMMapHook);
-HookList<MallocHook::MmapHook> mmap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::MunmapHook> munmap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::MremapHook> mremap_hooks_ = INIT_HOOK_LIST;
-HookList<MallocHook::PreSbrkHook> presbrk_hooks_ =
-    INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook);
-HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST;
-
-// These lists contain either 0 or 1 hooks.
-HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 };
-HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 };
-
-#undef INIT_HOOK_LIST_WITH_VALUE
-#undef INIT_HOOK_LIST
-
-} }  // namespace base::internal
-
-using base::internal::kHookListMaxValues;
-using base::internal::new_hooks_;
-using base::internal::delete_hooks_;
-using base::internal::premmap_hooks_;
-using base::internal::mmap_hooks_;
-using base::internal::mmap_replacement_;
-using base::internal::munmap_hooks_;
-using base::internal::munmap_replacement_;
-using base::internal::mremap_hooks_;
-using base::internal::presbrk_hooks_;
-using base::internal::sbrk_hooks_;
-
-// These are available as C bindings as well as C++, hence their
-// definition outside the MallocHook class.
-extern "C"
-int MallocHook_AddNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "AddNewHook(%p)", hook);
-  return new_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "RemoveNewHook(%p)", hook);
-  return new_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "AddDeleteHook(%p)", hook);
-  return delete_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "RemoveDeleteHook(%p)", hook);
-  return delete_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "AddPreMmapHook(%p)", hook);
-  return premmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "RemovePreMmapHook(%p)", hook);
-  return premmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) {
-  RAW_VLOG(10, "SetMmapReplacement(%p)", hook);
-  // NOTE this is a best effort CHECK. Concurrent sets could succeed since
-  // this test is outside of the Add spin lock.
-  RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed.");
-  return mmap_replacement_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) {
-  RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook);
-  return mmap_replacement_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "AddMmapHook(%p)", hook);
-  return mmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "RemoveMmapHook(%p)", hook);
-  return mmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "AddMunmapHook(%p)", hook);
-  return munmap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "RemoveMunmapHook(%p)", hook);
-  return munmap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) {
-  RAW_VLOG(10, "SetMunmapReplacement(%p)", hook);
-  // NOTE this is a best effort CHECK. Concurrent sets could succeed since
-  // this test is outside of the Add spin lock.
-  RAW_CHECK(munmap_replacement_.empty(),
-            "Only one MunmapReplacement is allowed.");
-  return munmap_replacement_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) {
-  RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook);
-  return munmap_replacement_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "AddMremapHook(%p)", hook);
-  return mremap_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "RemoveMremapHook(%p)", hook);
-  return mremap_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "AddPreSbrkHook(%p)", hook);
-  return presbrk_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "RemovePreSbrkHook(%p)", hook);
-  return presbrk_hooks_.Remove(hook);
-}
-
-extern "C"
-int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "AddSbrkHook(%p)", hook);
-  return sbrk_hooks_.Add(hook);
-}
-
-extern "C"
-int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "RemoveSbrkHook(%p)", hook);
-  return sbrk_hooks_.Remove(hook);
-}
-
-// The code below is DEPRECATED.
-extern "C"
-MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) {
-  RAW_VLOG(10, "SetNewHook(%p)", hook);
-  return new_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) {
-  RAW_VLOG(10, "SetDeleteHook(%p)", hook);
-  return delete_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) {
-  RAW_VLOG(10, "SetPreMmapHook(%p)", hook);
-  return premmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) {
-  RAW_VLOG(10, "SetMmapHook(%p)", hook);
-  return mmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) {
-  RAW_VLOG(10, "SetMunmapHook(%p)", hook);
-  return munmap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) {
-  RAW_VLOG(10, "SetMremapHook(%p)", hook);
-  return mremap_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) {
-  RAW_VLOG(10, "SetPreSbrkHook(%p)", hook);
-  return presbrk_hooks_.ExchangeSingular(hook);
-}
-
-extern "C"
-MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) {
-  RAW_VLOG(10, "SetSbrkHook(%p)", hook);
-  return sbrk_hooks_.ExchangeSingular(hook);
-}
-// End of DEPRECATED code section.
-
-// Note: embedding the function calls inside the traversal of HookList would be
-// very confusing, as it is legal for a hook to remove itself and add other
-// hooks.  Doing traversal first, and then calling the hooks ensures we only
-// call the hooks registered at the start.
-#define INVOKE_HOOKS(HookType, hook_list, args) do {                    \
-    HookType hooks[kHookListMaxValues];                                 \
-    int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues);      \
-    for (int i = 0; i < num_hooks; ++i) {                               \
-      (*hooks[i])args;                                                  \
-    }                                                                   \
-  } while (0)
-
-// There should only be one replacement. Return the result of the first
-// one, or false if there is none.
-#define INVOKE_REPLACEMENT(HookType, hook_list, args) do {              \
-    HookType hooks[kHookListMaxValues];                                 \
-    int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues);      \
-    return (num_hooks > 0 && (*hooks[0])args);                          \
-  } while (0)
-
-
-void MallocHook::InvokeNewHookSlow(const void* p, size_t s) {
-  if (tcmalloc::IsEmergencyPtr(p)) {
-    return;
-  }
-  INVOKE_HOOKS(NewHook, new_hooks_, (p, s));
-}
-
-void MallocHook::InvokeDeleteHookSlow(const void* p) {
-  if (tcmalloc::IsEmergencyPtr(p)) {
-    return;
-  }
-  INVOKE_HOOKS(DeleteHook, delete_hooks_, (p));
-}
-
-void MallocHook::InvokePreMmapHookSlow(const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset) {
-  INVOKE_HOOKS(PreMmapHook, premmap_hooks_, (start, size, protection, flags, fd,
-                                            offset));
-}
-
-void MallocHook::InvokeMmapHookSlow(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset) {
-  INVOKE_HOOKS(MmapHook, mmap_hooks_, (result, start, size, protection, flags,
-                                       fd, offset));
-}
-
-bool MallocHook::InvokeMmapReplacementSlow(const void* start,
-                                           size_t size,
-                                           int protection,
-                                           int flags,
-                                           int fd,
-                                           off_t offset,
-                                           void** result) {
-  INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_,
-                      (start, size, protection, flags, fd, offset, result));
-}
-
-void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) {
-  INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s));
-}
-
-bool MallocHook::InvokeMunmapReplacementSlow(const void* p,
-                                             size_t s,
-                                             int* result) {
-  INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result));
-}
-
-void MallocHook::InvokeMremapHookSlow(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr) {
-  INVOKE_HOOKS(MremapHook, mremap_hooks_, (result, old_addr, old_size, new_size,
-                                           flags, new_addr));
-}
-
-void MallocHook::InvokePreSbrkHookSlow(ptrdiff_t increment) {
-  INVOKE_HOOKS(PreSbrkHook, presbrk_hooks_, (increment));
-}
-
-void MallocHook::InvokeSbrkHookSlow(const void* result, ptrdiff_t increment) {
-  INVOKE_HOOKS(SbrkHook, sbrk_hooks_, (result, increment));
-}
-
-#undef INVOKE_HOOKS
-
-#ifndef NO_TCMALLOC_SAMPLES
-
-DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc);
-DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc);
-  // actual functions are in debugallocation.cc or tcmalloc.cc
-DEFINE_ATTRIBUTE_SECTION_VARS(malloc_hook);
-DECLARE_ATTRIBUTE_SECTION_VARS(malloc_hook);
-  // actual functions are in this file, malloc_hook.cc, and low_level_alloc.cc
-
-#define ADDR_IN_ATTRIBUTE_SECTION(addr, name) \
-  (reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_START(name)) <= \
-     reinterpret_cast<uintptr_t>(addr) && \
-   reinterpret_cast<uintptr_t>(addr) < \
-     reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_STOP(name)))
-
-// Return true iff 'caller' is a return address within a function
-// that calls one of our hooks via MallocHook:Invoke*.
-// A helper for GetCallerStackTrace.
-static inline bool InHookCaller(const void* caller) {
-  return ADDR_IN_ATTRIBUTE_SECTION(caller, google_malloc) ||
-         ADDR_IN_ATTRIBUTE_SECTION(caller, malloc_hook);
-  // We can use one section for everything except tcmalloc_or_debug
-  // due to its special linkage mode, which prevents merging of the sections.
-}
-
-#undef ADDR_IN_ATTRIBUTE_SECTION
-
-static bool checked_sections = false;
-
-static inline void CheckInHookCaller() {
-  if (!checked_sections) {
-    INIT_ATTRIBUTE_SECTION_VARS(google_malloc);
-    if (ATTRIBUTE_SECTION_START(google_malloc) ==
-        ATTRIBUTE_SECTION_STOP(google_malloc)) {
-      RAW_LOG(ERROR, "google_malloc section is missing, "
-                     "thus InHookCaller is broken!");
-    }
-    INIT_ATTRIBUTE_SECTION_VARS(malloc_hook);
-    if (ATTRIBUTE_SECTION_START(malloc_hook) ==
-        ATTRIBUTE_SECTION_STOP(malloc_hook)) {
-      RAW_LOG(ERROR, "malloc_hook section is missing, "
-                     "thus InHookCaller is broken!");
-    }
-    checked_sections = true;
-  }
-}
-
-#endif // !NO_TCMALLOC_SAMPLES
-
-// We can improve behavior/compactness of this function
-// if we pass a generic test function (with a generic arg)
-// into the implementations for GetStackTrace instead of the skip_count.
-extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth,
-                                              int skip_count) {
-#if defined(NO_TCMALLOC_SAMPLES)
-  return 0;
-#elif !defined(HAVE_ATTRIBUTE_SECTION_START)
-  // Fall back to GetStackTrace and good old but fragile frame skip counts.
-  // Note: this path is inaccurate when a hook is not called directly by an
-  // allocation function but is daisy-chained through another hook,
-  // search for MallocHook::(Get|Set|Invoke)* to find such cases.
-  return GetStackTrace(result, max_depth, skip_count + int(DEBUG_MODE));
-  // due to -foptimize-sibling-calls in opt mode
-  // there's no need for extra frame skip here then
-#else
-  CheckInHookCaller();
-  // MallocHook caller determination via InHookCaller works, use it:
-  static const int kMaxSkip = 32 + 6 + 3;
-    // Constant tuned to do just one GetStackTrace call below in practice
-    // and not get many frames that we don't actually need:
-    // currently max passsed max_depth is 32,
-    // max passed/needed skip_count is 6
-    // and 3 is to account for some hook daisy chaining.
-  static const int kStackSize = kMaxSkip + 1;
-  void* stack[kStackSize];
-  int depth = GetStackTrace(stack, kStackSize, 1);  // skip this function frame
-  if (depth == 0)   // silenty propagate cases when GetStackTrace does not work
-    return 0;
-  for (int i = 0; i < depth; ++i) {  // stack[0] is our immediate caller
-    if (InHookCaller(stack[i])) {
-      RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p",
-                   i, stack[i], stack[i+1]);
-      i += 1;  // skip hook caller frame
-      depth -= i;  // correct depth
-      if (depth > max_depth) depth = max_depth;
-      copy(stack + i, stack + i + depth, result);
-      if (depth < max_depth  &&  depth + i == kStackSize) {
-        // get frames for the missing depth
-        depth +=
-          GetStackTrace(result + depth, max_depth - depth, 1 + kStackSize);
-      }
-      return depth;
-    }
-  }
-  RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace");
-    // If this happens try increasing kMaxSkip
-    // or else something must be wrong with InHookCaller,
-    // e.g. for every section used in InHookCaller
-    // all functions in that section must be inside the same library.
-  return 0;
-#endif
-}
-
-// On systems where we know how, we override mmap/munmap/mremap/sbrk
-// to provide support for calling the related hooks (in addition,
-// of course, to doing what these functions normally do).
-
-#if defined(__linux)
-# include "malloc_hook_mmap_linux.h"
-
-#elif defined(__FreeBSD__)
-# include "malloc_hook_mmap_freebsd.h"
-
-#else
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = mmap(start, length, prot, flags, fd, offset);
-  }
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = munmap(start, length);
-  }
-  return result;
-}
-
-#endif
diff --git a/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h b/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h
deleted file mode 100644
index 8575dcc7c08..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook_mmap_freebsd.h
+++ /dev/null
@@ -1,135 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Override mmap/munmap/mremap/sbrk to provide support for calling the
-// related hooks (in addition, of course, to doing what these
-// functions normally do).
-
-#ifndef __FreeBSD__
-# error Should only be including malloc_hook_mmap_freebsd.h on FreeBSD systems.
-#endif
-
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include <dlfcn.h>
-
-// Make sure mmap doesn't get #define'd away by <sys/mman.h>
-#undef mmap
-
-// According to the FreeBSD documentation, use syscall if you do not
-// need 64-bit alignment otherwise use __syscall. Indeed, syscall
-// doesn't work correctly in most situations on 64-bit. It's return
-// type is 'int' so for things like SYS_mmap, it actually truncates
-// the returned address to 32-bits.
-#if defined(__amd64__) || defined(__x86_64__)
-# define MALLOC_HOOK_SYSCALL __syscall
-#else
-# define MALLOC_HOOK_SYSCALL syscall
-#endif
-
-
-extern "C" {
-  void* mmap(void *start, size_t length,int prot, int flags,
-             int fd, off_t offset) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  int munmap(void* start, size_t length) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* sbrk(intptr_t increment) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-}
-
-static inline void* do_mmap(void *start, size_t length,
-                            int prot, int flags,
-                            int fd, off_t offset) __THROW {
-  return (void *)MALLOC_HOOK_SYSCALL(SYS_mmap,
-                                     start, length, prot, flags, fd, offset);
-}
-
-static inline void* do_sbrk(intptr_t increment) {
-  static void *(*libc_sbrk)(intptr_t);
-  if (libc_sbrk == NULL)
-    libc_sbrk = (void *(*)(intptr_t))dlsym(RTLD_NEXT, "sbrk");
-
-  return libc_sbrk(increment);
-}
-
-
-extern "C" void* mmap(void *start, size_t length, int prot, int flags,
-                      int fd, off_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap(start, length, prot, flags, fd,
-                       static_cast<size_t>(offset)); // avoid sign extension
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-extern "C" int munmap(void* start, size_t length) __THROW {
-  MallocHook::InvokeMunmapHook(start, length);
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
-  }
-
-  return result;
-}
-
-extern "C" void* sbrk(intptr_t increment) __THROW {
-  MallocHook::InvokePreSbrkHook(increment);
-  void *result = do_sbrk(increment);
-  MallocHook::InvokeSbrkHook(result, increment);
-  return result;
-}
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-	  start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap(start, length, prot, flags, fd, offset);
-  }
-
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
-  }
-  return result;
-}
-
-#undef MALLOC_HOOK_SYSCALL
diff --git a/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h b/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h
deleted file mode 100644
index 4b1386185bc..00000000000
--- a/contrib/libtcmalloc/src/malloc_hook_mmap_linux.h
+++ /dev/null
@@ -1,238 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-// We define mmap() and mmap64(), which somewhat reimplements libc's mmap
-// syscall stubs.  Unfortunately libc only exports the stubs via weak symbols
-// (which we're overriding with our mmap64() and mmap() wrappers) so we can't
-// just call through to them.
-
-#ifndef __linux
-# error Should only be including malloc_hook_mmap_linux.h on linux systems.
-#endif
-
-#include <unistd.h>
-#include <syscall.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include "base/linux_syscall_support.h"
-
-// The x86-32 case and the x86-64 case differ:
-// 32b has a mmap2() syscall, 64b does not.
-// 64b and 32b have different calling conventions for mmap().
-
-// I test for 64-bit first so I don't have to do things like
-// '#if (defined(__mips__) && !defined(__MIPS64__))' as a mips32 check.
-#if defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__) || (defined(_MIPS_SIM) && _MIPS_SIM == _ABI64) || defined(__s390__)
-
-static inline void* do_mmap64(void *start, size_t length,
-                              int prot, int flags,
-                              int fd, __off64_t offset) __THROW {
-  return sys_mmap(start, length, prot, flags, fd, offset);
-}
-
-#define MALLOC_HOOK_HAVE_DO_MMAP64 1
-
-#elif defined(__i386__) || defined(__PPC__) || defined(__mips__) || \
-      defined(__arm__)
-
-static inline void* do_mmap64(void *start, size_t length,
-                              int prot, int flags,
-                              int fd, __off64_t offset) __THROW {
-  void *result;
-
-  // Try mmap2() unless it's not supported
-  static bool have_mmap2 = true;
-  if (have_mmap2) {
-    static int pagesize = 0;
-    if (!pagesize) pagesize = getpagesize();
-
-    // Check that the offset is page aligned
-    if (offset & (pagesize - 1)) {
-      result = MAP_FAILED;
-      errno = EINVAL;
-      goto out;
-    }
-
-    result = (void *)syscall(SYS_mmap2,
-                             start, length, prot, flags, fd,
-                             (off_t) (offset / pagesize));
-    if (result != MAP_FAILED || errno != ENOSYS)  goto out;
-
-    // We don't have mmap2() after all - don't bother trying it in future
-    have_mmap2 = false;
-  }
-
-  if (((off_t)offset) != offset) {
-    // If we're trying to map a 64-bit offset, fail now since we don't
-    // have 64-bit mmap() support.
-    result = MAP_FAILED;
-    errno = EINVAL;
-    goto out;
-  }
-
-#ifdef __NR_mmap
-  {
-    // Fall back to old 32-bit offset mmap() call
-    // Old syscall interface cannot handle six args, so pass in an array
-    int32 args[6] = { (int32) start, (int32) length, prot, flags, fd,
-                      (int32)(off_t) offset };
-    result = (void *)syscall(SYS_mmap, args);
-  }
-#else
-  // Some Linux ports like ARM EABI Linux has no mmap, just mmap2.
-  result = MAP_FAILED;
-#endif
-
- out:
-  return result;
-}
-
-#define MALLOC_HOOK_HAVE_DO_MMAP64 1
-
-#endif  // #if defined(__x86_64__)
-
-
-#ifdef MALLOC_HOOK_HAVE_DO_MMAP64
-
-// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook
-// calls right into mmap and mmap64, so that the stack frames in the caller's
-// stack are at the same offsets for all the calls of memory allocating
-// functions.
-
-// Put all callers of MallocHook::Invoke* in this module into
-// malloc_hook section,
-// so that MallocHook::GetCallerStackTrace can function accurately:
-
-// Make sure mmap doesn't get #define'd away by <sys/mman.h>
-# undef mmap
-
-extern "C" {
-  void* mmap64(void *start, size_t length, int prot, int flags,
-               int fd, __off64_t offset  ) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* mmap(void *start, size_t length,int prot, int flags,
-             int fd, off_t offset) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  int munmap(void* start, size_t length) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* mremap(void* old_addr, size_t old_size, size_t new_size,
-               int flags, ...) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-  void* sbrk(ptrdiff_t increment) __THROW
-    ATTRIBUTE_SECTION(malloc_hook);
-}
-
-extern "C" void* mmap64(void *start, size_t length, int prot, int flags,
-                        int fd, __off64_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd, offset);
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-# if !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
-
-extern "C" void* mmap(void *start, size_t length, int prot, int flags,
-                      int fd, off_t offset) __THROW {
-  MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd,
-                       static_cast<size_t>(offset)); // avoid sign extension
-  }
-  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
-  return result;
-}
-
-# endif  // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
-
-extern "C" int munmap(void* start, size_t length) __THROW {
-  MallocHook::InvokeMunmapHook(start, length);
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = sys_munmap(start, length);
-  }
-  return result;
-}
-
-extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size,
-                        int flags, ...) __THROW {
-  va_list ap;
-  va_start(ap, flags);
-  void *new_address = va_arg(ap, void *);
-  va_end(ap);
-  void* result = sys_mremap(old_addr, old_size, new_size, flags, new_address);
-  MallocHook::InvokeMremapHook(result, old_addr, old_size, new_size, flags,
-                               new_address);
-  return result;
-}
-
-#ifndef __UCLIBC__
-// libc's version:
-extern "C" void* __sbrk(ptrdiff_t increment);
-
-extern "C" void* sbrk(ptrdiff_t increment) __THROW {
-  MallocHook::InvokePreSbrkHook(increment);
-  void *result = __sbrk(increment);
-  MallocHook::InvokeSbrkHook(result, increment);
-  return result;
-}
-
-#endif
-
-/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
-                                         int flags, int fd, off_t offset) {
-  void* result;
-  if (!MallocHook::InvokeMmapReplacement(
-          start, length, prot, flags, fd, offset, &result)) {
-    result = do_mmap64(start, length, prot, flags, fd, offset);
-  }
-  return result;
-}
-
-/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  int result;
-  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
-    result = syscall(SYS_munmap, start, length);
-  }
-  return result;
-}
-
-#undef MALLOC_HOOK_HAVE_DO_MMAP64
-
-#endif  // #ifdef MALLOC_HOOK_HAVE_DO_MMAP64
diff --git a/contrib/libtcmalloc/src/maybe_emergency_malloc.h b/contrib/libtcmalloc/src/maybe_emergency_malloc.h
deleted file mode 100644
index 250ecf01a3f..00000000000
--- a/contrib/libtcmalloc/src/maybe_emergency_malloc.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2014, gperftools Contributors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef MAYBE_EMERGENCY_MALLOC_H
-#define MAYBE_EMERGENCY_MALLOC_H
-
-#include "config.h"
-
-#ifdef ENABLE_EMERGENCY_MALLOC
-
-#include "emergency_malloc.h"
-
-#else
-
-namespace tcmalloc {
-  static inline void *EmergencyMalloc(size_t size) {return NULL;}
-  static inline void EmergencyFree(void *p) {}
-  static inline void *EmergencyCalloc(size_t n, size_t elem_size) {return NULL;}
-  static inline void *EmergencyRealloc(void *old_ptr, size_t new_size) {return NULL;}
-
-  static inline bool IsEmergencyPtr(const void *_ptr) {
-    return false;
-  }
-}
-
-#endif // ENABLE_EMERGENCY_MALLOC
-
-#endif
diff --git a/contrib/libtcmalloc/src/maybe_threads.cc b/contrib/libtcmalloc/src/maybe_threads.cc
deleted file mode 100644
index acfc99a5ae5..00000000000
--- a/contrib/libtcmalloc/src/maybe_threads.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Menage <opensource@google.com>
-//
-// Some wrappers for pthread functions so that we can be LD_PRELOADed
-// against non-pthreads apps.
-//
-// This module will behave very strangely if some pthreads functions
-// exist and others don't.
-
-#include "config.h"
-#include <assert.h>
-#include <string.h>    // for memcmp
-#include <stdio.h>     // for __isthreaded on FreeBSD
-// We don't actually need strings. But including this header seems to
-// stop the compiler trying to short-circuit our pthreads existence
-// tests and claiming that the address of a function is always
-// non-zero. I have no idea why ...
-#include <string>
-#include "maybe_threads.h"
-#include "base/basictypes.h"
-#include "base/logging.h"
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-// These are the methods we're going to conditionally include.
-extern "C" {
-  int pthread_key_create (pthread_key_t*, void (*)(void*))
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_key_delete (pthread_key_t)
-      __THROW ATTRIBUTE_WEAK;
-  void *pthread_getspecific(pthread_key_t)
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_setspecific(pthread_key_t, const void*)
-      __THROW ATTRIBUTE_WEAK;
-  int pthread_once(pthread_once_t *, void (*)(void))
-      ATTRIBUTE_WEAK;
-  int pthread_atfork(void (*__prepare) (void),
-                     void (*__parent) (void),
-                     void (*__child) (void))
-    __THROW ATTRIBUTE_WEAK;
-}
-
-#define MAX_PERTHREAD_VALS 16
-static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS];
-static int next_key;
-
-// NOTE: it's similar to bitcast defined in basic_types.h with
-// exception of ignoring sizes mismatch
-template <typename T1, typename T2>
-static T2 memcpy_cast(const T1 &input) {
-  T2 output;
-  size_t s = sizeof(input);
-  if (sizeof(output) < s) {
-    s = sizeof(output);
-  }
-  memcpy(&output, &input, s);
-  return output;
-}
-
-int perftools_pthread_key_create(pthread_key_t *key,
-                                 void (*destr_function) (void *)) {
-  if (pthread_key_create) {
-    return pthread_key_create(key, destr_function);
-  } else {
-    assert(next_key < MAX_PERTHREAD_VALS);
-    *key = memcpy_cast<int, pthread_key_t>(next_key++);
-    return 0;
-  }
-}
-
-int perftools_pthread_key_delete(pthread_key_t key) {
-  if (pthread_key_delete) {
-    return pthread_key_delete(key);
-  } else {
-    return 0;
-  }
-}
-
-void *perftools_pthread_getspecific(pthread_key_t key) {
-  if (pthread_getspecific) {
-    return pthread_getspecific(key);
-  } else {
-    return perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)];
-  }
-}
-
-int perftools_pthread_setspecific(pthread_key_t key, void *val) {
-  if (pthread_setspecific) {
-    return pthread_setspecific(key, val);
-  } else {
-    perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)] = val;
-    return 0;
-  }
-}
-
-
-static pthread_once_t pthread_once_init = PTHREAD_ONCE_INIT;
-int perftools_pthread_once(pthread_once_t *ctl,
-                           void  (*init_routine) (void)) {
-#ifdef __FreeBSD__
-  // On __FreeBSD__, calling pthread_once on a system that is not
-  // linked with -pthread is silently a noop. :-( Luckily, we have a
-  // workaround: FreeBSD exposes __isthreaded in <stdio.h>, which is
-  // set to 1 when the first thread is spawned.  So on those systems,
-  // we can use our own separate pthreads-once mechanism, which is
-  // used until __isthreaded is 1 (which will never be true if the app
-  // is not linked with -pthread).
-  static bool pthread_once_ran_before_threads = false;
-  if (pthread_once_ran_before_threads) {
-    return 0;
-  }
-  if (!__isthreaded) {
-    init_routine();
-    pthread_once_ran_before_threads = true;
-    return 0;
-  }
-#endif
-  if (pthread_once) {
-    return pthread_once(ctl, init_routine);
-  } else {
-    if (memcmp(ctl, &pthread_once_init, sizeof(*ctl)) == 0) {
-      init_routine();
-      ++*(char*)(ctl);        // make it so it's no longer equal to init
-    }
-    return 0;
-  }
-}
-
-void perftools_pthread_atfork(void (*before)(),
-                              void (*parent_after)(),
-                              void (*child_after)()) {
-  if (pthread_atfork) {
-    int rv = pthread_atfork(before, parent_after, child_after);
-    CHECK(rv == 0);
-  }
-}
diff --git a/contrib/libtcmalloc/src/maybe_threads.h b/contrib/libtcmalloc/src/maybe_threads.h
deleted file mode 100644
index c6cfdf7d158..00000000000
--- a/contrib/libtcmalloc/src/maybe_threads.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Paul Menage <opensource@google.com>
-
-//-------------------------------------------------------------------
-// Some wrappers for pthread functions so that we can be LD_PRELOADed
-// against non-pthreads apps.
-//-------------------------------------------------------------------
-
-#ifndef GOOGLE_MAYBE_THREADS_H_
-#define GOOGLE_MAYBE_THREADS_H_
-
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-
-int perftools_pthread_key_create(pthread_key_t *key,
-                                 void (*destr_function) (void *));
-int perftools_pthread_key_delete(pthread_key_t key);
-void *perftools_pthread_getspecific(pthread_key_t key);
-int perftools_pthread_setspecific(pthread_key_t key, void *val);
-int perftools_pthread_once(pthread_once_t *ctl,
-                           void  (*init_routine) (void));
-
-// Our wrapper for pthread_atfork. Does _nothing_ when there are no
-// threads. See static_vars.cc:SetupAtForkLocksHandler for only user
-// of this.
-void perftools_pthread_atfork(void (*before)(),
-                              void (*parent_after)(),
-                              void (*child_after)());
-
-#endif  /* GOOGLE_MAYBE_THREADS_H_ */
diff --git a/contrib/libtcmalloc/src/memfs_malloc.cc b/contrib/libtcmalloc/src/memfs_malloc.cc
deleted file mode 100644
index 419ef24e43b..00000000000
--- a/contrib/libtcmalloc/src/memfs_malloc.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Arun Sharma
-//
-// A tcmalloc system allocator that uses a memory based filesystem such as
-// tmpfs or hugetlbfs
-//
-// Since these only exist on linux, we only register this allocator there.
-
-#ifdef __linux
-
-#include "config.h"
-#include <errno.h>                      // for errno, EINVAL
-#include <inttypes.h>                   // for PRId64
-#include <limits.h>                     // for PATH_MAX
-#include <stddef.h>                     // for size_t, NULL
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for int64_t, uintptr_t
-#endif
-#include <stdio.h>                      // for snprintf
-#include <stdlib.h>                     // for mkstemp
-#include <string.h>                     // for strerror
-#include <sys/mman.h>                   // for mmap, MAP_FAILED, etc
-#include <sys/statfs.h>                 // for fstatfs, statfs
-#include <unistd.h>                     // for ftruncate, off_t, unlink
-#include <new>                          // for operator new
-#include <string>
-
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "base/googleinit.h"
-#include "base/sysinfo.h"
-#include "internal_logging.h"
-
-// TODO(sanjay): Move the code below into the tcmalloc namespace
-using tcmalloc::kLog;
-using tcmalloc::kCrash;
-using tcmalloc::Log;
-using std::string;
-
-DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""),
-              "Path where hugetlbfs or tmpfs is mounted. The caller is "
-              "responsible for ensuring that the path is unique and does "
-              "not conflict with another process");
-DEFINE_int64(memfs_malloc_limit_mb,
-             EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0),
-             "Limit total allocation size to the "
-             "specified number of MiB.  0 == no limit.");
-DEFINE_bool(memfs_malloc_abort_on_fail,
-            EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false),
-            "abort() whenever memfs_malloc fails to satisfy an allocation "
-            "for any reason.");
-DEFINE_bool(memfs_malloc_ignore_mmap_fail,
-            EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false),
-            "Ignore failures from mmap");
-DEFINE_bool(memfs_malloc_map_private,
-            EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false),
-	    "Use MAP_PRIVATE with mmap");
-
-// Hugetlbfs based allocator for tcmalloc
-class HugetlbSysAllocator: public SysAllocator {
-public:
-  explicit HugetlbSysAllocator(SysAllocator* fallback)
-    : failed_(true),  // To disable allocator until Initialize() is called.
-      big_page_size_(0),
-      hugetlb_fd_(-1),
-      hugetlb_base_(0),
-      fallback_(fallback) {
-  }
-
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-  bool Initialize();
-
-  bool failed_;          // Whether failed to allocate memory.
-
-private:
-  void* AllocInternal(size_t size, size_t *actual_size, size_t alignment);
-
-  int64 big_page_size_;
-  int hugetlb_fd_;       // file descriptor for hugetlb
-  off_t hugetlb_base_;
-
-  SysAllocator* fallback_;  // Default system allocator to fall back to.
-};
-static union {
-  char buf[sizeof(HugetlbSysAllocator)];
-  void *ptr;
-} hugetlb_space;
-
-// No locking needed here since we assume that tcmalloc calls
-// us with an internal lock held (see tcmalloc/system-alloc.cc).
-void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                 size_t alignment) {
-  if (failed_) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  // We don't respond to allocation requests smaller than big_page_size_ unless
-  // the caller is ok to take more than they asked for. Used by MetaDataAlloc.
-  if (actual_size == NULL && size < big_page_size_) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  // Enforce huge page alignment.  Be careful to deal with overflow.
-  size_t new_alignment = alignment;
-  if (new_alignment < big_page_size_) new_alignment = big_page_size_;
-  size_t aligned_size = ((size + new_alignment - 1) /
-                         new_alignment) * new_alignment;
-  if (aligned_size < size) {
-    return fallback_->Alloc(size, actual_size, alignment);
-  }
-
-  void* result = AllocInternal(aligned_size, actual_size, new_alignment);
-  if (result != NULL) {
-    return result;
-  }
-  Log(kLog, __FILE__, __LINE__,
-      "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_);
-  if (FLAGS_memfs_malloc_abort_on_fail) {
-    Log(kCrash, __FILE__, __LINE__,
-        "memfs_malloc_abort_on_fail is set");
-  }
-  return fallback_->Alloc(size, actual_size, alignment);
-}
-
-void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
-                                         size_t alignment) {
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > big_page_size_) {
-    extra = alignment - big_page_size_;
-  }
-
-  // Test if this allocation would put us over the limit.
-  off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024;
-  if (limit > 0 && hugetlb_base_ + size + extra > limit) {
-    // Disable the allocator when there's less than one page left.
-    if (limit - hugetlb_base_ < big_page_size_) {
-      Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb");
-      failed_ = true;
-    }
-    else {
-      Log(kLog, __FILE__, __LINE__,
-          "alloc too large (size, bytes left)", size, limit-hugetlb_base_);
-    }
-    return NULL;
-  }
-
-  // This is not needed for hugetlbfs, but needed for tmpfs.  Annoyingly
-  // hugetlbfs returns EINVAL for ftruncate.
-  int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra);
-  if (ret != 0 && errno != EINVAL) {
-    Log(kLog, __FILE__, __LINE__,
-        "ftruncate failed", strerror(errno));
-    failed_ = true;
-    return NULL;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void *result;
-  result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
-                FLAGS_memfs_malloc_map_private ? MAP_PRIVATE : MAP_SHARED,
-                hugetlb_fd_, hugetlb_base_);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    if (!FLAGS_memfs_malloc_ignore_mmap_fail) {
-      Log(kLog, __FILE__, __LINE__,
-          "mmap failed (size, error)", size + extra, strerror(errno));
-      failed_ = true;
-    }
-    return NULL;
-  }
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-
-  // Adjust the return memory so it is aligned
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-  ptr += adjust;
-  hugetlb_base_ += (size + extra);
-
-  if (actual_size) {
-    *actual_size = size + extra - adjust;
-  }
-
-  return reinterpret_cast<void*>(ptr);
-}
-
-bool HugetlbSysAllocator::Initialize() {
-  char path[PATH_MAX];
-  const int pathlen = FLAGS_memfs_malloc_path.size();
-  if (pathlen + 8 > sizeof(path)) {
-    Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long");
-    return false;
-  }
-  memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen);
-  memcpy(path + pathlen, ".XXXXXX", 8);  // Also copies terminating \0
-
-  int hugetlb_fd = mkstemp(path);
-  if (hugetlb_fd == -1) {
-    Log(kLog, __FILE__, __LINE__,
-        "warning: unable to create memfs_malloc_path",
-        path, strerror(errno));
-    return false;
-  }
-
-  // Cleanup memory on process exit
-  if (unlink(path) == -1) {
-    Log(kCrash, __FILE__, __LINE__,
-        "fatal: error unlinking memfs_malloc_path", path, strerror(errno));
-    return false;
-  }
-
-  // Use fstatfs to figure out the default page size for memfs
-  struct statfs sfs;
-  if (fstatfs(hugetlb_fd, &sfs) == -1) {
-    Log(kCrash, __FILE__, __LINE__,
-        "fatal: error fstatfs of memfs_malloc_path", strerror(errno));
-    return false;
-  }
-  int64 page_size = sfs.f_bsize;
-
-  hugetlb_fd_ = hugetlb_fd;
-  big_page_size_ = page_size;
-  failed_ = false;
-  return true;
-}
-
-REGISTER_MODULE_INITIALIZER(memfs_malloc, {
-  if (FLAGS_memfs_malloc_path.length()) {
-    SysAllocator* alloc = MallocExtension::instance()->GetSystemAllocator();
-    HugetlbSysAllocator* hp =
-      new (hugetlb_space.buf) HugetlbSysAllocator(alloc);
-    if (hp->Initialize()) {
-      MallocExtension::instance()->SetSystemAllocator(hp);
-    }
-  }
-});
-
-#endif   /* ifdef __linux */
diff --git a/contrib/libtcmalloc/src/memory_region_map.cc b/contrib/libtcmalloc/src/memory_region_map.cc
deleted file mode 100644
index 841d6f3cf85..00000000000
--- a/contrib/libtcmalloc/src/memory_region_map.cc
+++ /dev/null
@@ -1,831 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-//
-// Background and key design points of MemoryRegionMap.
-//
-// MemoryRegionMap is a low-level module with quite atypical requirements that
-// result in some degree of non-triviality of the implementation and design.
-//
-// MemoryRegionMap collects info about *all* memory regions created with
-// mmap, munmap, mremap, sbrk.
-// They key word above is 'all': all that are happening in a process
-// during its lifetime frequently starting even before global object
-// constructor execution.
-//
-// This is needed by the primary client of MemoryRegionMap:
-// HeapLeakChecker uses the regions and the associated stack traces
-// to figure out what part of the memory is the heap:
-// if MemoryRegionMap were to miss some (early) regions, leak checking would
-// stop working correctly.
-//
-// To accomplish the goal of functioning before/during global object
-// constructor execution MemoryRegionMap is done as a singleton service
-// that relies on own on-demand initialized static constructor-less data,
-// and only relies on other low-level modules that can also function properly
-// even before global object constructors run.
-//
-// Accomplishing the goal of collecting data about all mmap, munmap, mremap,
-// sbrk occurrences is a more involved: conceptually to do this one needs to
-// record some bits of data in particular about any mmap or sbrk call,
-// but to do that one needs to allocate memory for that data at some point,
-// but all memory allocations in the end themselves come from an mmap
-// or sbrk call (that's how the address space of the process grows).
-//
-// Also note that we need to do all the above recording from
-// within an mmap/sbrk hook which is sometimes/frequently is made by a memory
-// allocator, including the allocator MemoryRegionMap itself must rely on.
-// In the case of heap-checker usage this includes even the very first
-// mmap/sbrk call happening in the program: heap-checker gets activated due to
-// a link-time installed mmap/sbrk hook and it initializes MemoryRegionMap
-// and asks it to record info about this very first call right from that
-// very first hook invocation.
-//
-// MemoryRegionMap is doing its memory allocations via LowLevelAlloc:
-// unlike more complex standard memory allocator, LowLevelAlloc cooperates with
-// MemoryRegionMap by not holding any of its own locks while it calls mmap
-// to get memory, thus we are able to call LowLevelAlloc from
-// our mmap/sbrk hooks without causing a deadlock in it.
-// For the same reason of deadlock prevention the locking in MemoryRegionMap
-// itself is write-recursive which is an exception to Google's mutex usage.
-//
-// We still need to break the infinite cycle of mmap calling our hook,
-// which asks LowLevelAlloc for memory to record this mmap,
-// which (sometimes) causes mmap, which calls our hook, and so on.
-// We do this as follows: on a recursive call of MemoryRegionMap's
-// mmap/sbrk/mremap hook we record the data about the allocation in a
-// static fixed-sized stack (saved_regions and saved_buckets), when the
-// recursion unwinds but before returning from the outer hook call we unwind
-// this stack and move the data from saved_regions and saved_buckets to its
-// permanent place in the RegionSet and "bucket_table" respectively,
-// which can cause more allocations and mmap-s and recursion and unwinding,
-// but the whole process ends eventually due to the fact that for the small
-// allocations we are doing LowLevelAlloc reuses one mmap call and parcels out
-// the memory it created to satisfy several of our allocation requests.
-//
-
-// ========================================================================= //
-
-#include <config.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#elif !defined(MAP_FAILED)
-#define MAP_FAILED -1  // the only thing we need from mman.h
-#endif
-#ifdef HAVE_PTHREAD
-#include <pthread.h>   // for pthread_t, pthread_self()
-#endif
-#include <stddef.h>
-
-#include <algorithm>
-#include <set>
-
-#include "memory_region_map.h"
-
-#include "base/googleinit.h"
-#include "base/logging.h"
-#include "base/low_level_alloc.h"
-#include "malloc_hook-inl.h"
-
-#include <gperftools/stacktrace.h>
-#include <gperftools/malloc_hook.h>
-
-// MREMAP_FIXED is a linux extension.  How it's used in this file,
-// setting it to 0 is equivalent to saying, "This feature isn't
-// supported", which is right.
-#ifndef MREMAP_FIXED
-# define MREMAP_FIXED  0
-#endif
-
-using std::max;
-
-// ========================================================================= //
-
-int MemoryRegionMap::client_count_ = 0;
-int MemoryRegionMap::max_stack_depth_ = 0;
-MemoryRegionMap::RegionSet* MemoryRegionMap::regions_ = NULL;
-LowLevelAlloc::Arena* MemoryRegionMap::arena_ = NULL;
-SpinLock MemoryRegionMap::lock_(SpinLock::LINKER_INITIALIZED);
-SpinLock MemoryRegionMap::owner_lock_(  // ACQUIRED_AFTER(lock_)
-    SpinLock::LINKER_INITIALIZED);
-int MemoryRegionMap::recursion_count_ = 0;  // GUARDED_BY(owner_lock_)
-pthread_t MemoryRegionMap::lock_owner_tid_;  // GUARDED_BY(owner_lock_)
-int64 MemoryRegionMap::map_size_ = 0;
-int64 MemoryRegionMap::unmap_size_ = 0;
-HeapProfileBucket** MemoryRegionMap::bucket_table_ = NULL;  // GUARDED_BY(lock_)
-int MemoryRegionMap::num_buckets_ = 0;  // GUARDED_BY(lock_)
-int MemoryRegionMap::saved_buckets_count_ = 0;  // GUARDED_BY(lock_)
-HeapProfileBucket MemoryRegionMap::saved_buckets_[20];  // GUARDED_BY(lock_)
-
-// GUARDED_BY(lock_)
-const void* MemoryRegionMap::saved_buckets_keys_[20][kMaxStackDepth];
-
-// ========================================================================= //
-
-// Simple hook into execution of global object constructors,
-// so that we do not call pthread_self() when it does not yet work.
-static bool libpthread_initialized = false;
-REGISTER_MODULE_INITIALIZER(libpthread_initialized_setter,
-                            libpthread_initialized = true);
-
-static inline bool current_thread_is(pthread_t should_be) {
-  // Before main() runs, there's only one thread, so we're always that thread
-  if (!libpthread_initialized) return true;
-  // this starts working only sometime well into global constructor execution:
-  return pthread_equal(pthread_self(), should_be);
-}
-
-// ========================================================================= //
-
-// Constructor-less place-holder to store a RegionSet in.
-union MemoryRegionMap::RegionSetRep {
-  char rep[sizeof(RegionSet)];
-  void* align_it;  // do not need a better alignment for 'rep' than this
-  RegionSet* region_set() { return reinterpret_cast<RegionSet*>(rep); }
-};
-
-// The bytes where MemoryRegionMap::regions_ will point to.
-// We use RegionSetRep with noop c-tor so that global construction
-// does not interfere.
-static MemoryRegionMap::RegionSetRep regions_rep;
-
-// ========================================================================= //
-
-// Has InsertRegionLocked been called recursively
-// (or rather should we *not* use regions_ to record a hooked mmap).
-static bool recursive_insert = false;
-
-void MemoryRegionMap::Init(int max_stack_depth, bool use_buckets) {
-  RAW_VLOG(10, "MemoryRegionMap Init");
-  RAW_CHECK(max_stack_depth >= 0, "");
-  // Make sure we don't overflow the memory in region stacks:
-  RAW_CHECK(max_stack_depth <= kMaxStackDepth,
-            "need to increase kMaxStackDepth?");
-  Lock();
-  client_count_ += 1;
-  max_stack_depth_ = max(max_stack_depth_, max_stack_depth);
-  if (client_count_ > 1) {
-    // not first client: already did initialization-proper
-    Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Init increment done");
-    return;
-  }
-  // Set our hooks and make sure they were installed:
-  RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), "");
-  RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), "");
-  RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), "");
-  RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), "");
-  // We need to set recursive_insert since the NewArena call itself
-  // will already do some allocations with mmap which our hooks will catch
-  // recursive_insert allows us to buffer info about these mmap calls.
-  // Note that Init() can be (and is) sometimes called
-  // already from within an mmap/sbrk hook.
-  recursive_insert = true;
-  arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena());
-  recursive_insert = false;
-  HandleSavedRegionsLocked(&InsertRegionLocked);  // flush the buffered ones
-    // Can't instead use HandleSavedRegionsLocked(&DoInsertRegionLocked) before
-    // recursive_insert = false; as InsertRegionLocked will also construct
-    // regions_ on demand for us.
-  if (use_buckets) {
-    const int table_bytes = kHashTableSize * sizeof(*bucket_table_);
-    recursive_insert = true;
-    bucket_table_ = static_cast<HeapProfileBucket**>(
-        MyAllocator::Allocate(table_bytes));
-    recursive_insert = false;
-    memset(bucket_table_, 0, table_bytes);
-    num_buckets_ = 0;
-  }
-  Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Init done");
-}
-
-bool MemoryRegionMap::Shutdown() {
-  RAW_VLOG(10, "MemoryRegionMap Shutdown");
-  Lock();
-  RAW_CHECK(client_count_ > 0, "");
-  client_count_ -= 1;
-  if (client_count_ != 0) {  // not last client; need not really shutdown
-    Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done");
-    return true;
-  }
-  if (bucket_table_ != NULL) {
-    for (int i = 0; i < kHashTableSize; i++) {
-      for (HeapProfileBucket* curr = bucket_table_[i]; curr != 0; /**/) {
-        HeapProfileBucket* bucket = curr;
-        curr = curr->next;
-        MyAllocator::Free(bucket->stack, 0);
-        MyAllocator::Free(bucket, 0);
-      }
-    }
-    MyAllocator::Free(bucket_table_, 0);
-    num_buckets_ = 0;
-    bucket_table_ = NULL;
-  }
-  RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), "");
-  RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), "");
-  RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), "");
-  RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), "");
-  if (regions_) regions_->~RegionSet();
-  regions_ = NULL;
-  bool deleted_arena = LowLevelAlloc::DeleteArena(arena_);
-  if (deleted_arena) {
-    arena_ = 0;
-  } else {
-    RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used");
-  }
-  Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Shutdown done");
-  return deleted_arena;
-}
-
-bool MemoryRegionMap::IsRecordingLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  return client_count_ > 0;
-}
-
-// Invariants (once libpthread_initialized is true):
-//   * While lock_ is not held, recursion_count_ is 0 (and
-//     lock_owner_tid_ is the previous owner, but we don't rely on
-//     that).
-//   * recursion_count_ and lock_owner_tid_ are only written while
-//     both lock_ and owner_lock_ are held. They may be read under
-//     just owner_lock_.
-//   * At entry and exit of Lock() and Unlock(), the current thread
-//     owns lock_ iff pthread_equal(lock_owner_tid_, pthread_self())
-//     && recursion_count_ > 0.
-void MemoryRegionMap::Lock() {
-  {
-    SpinLockHolder l(&owner_lock_);
-    if (recursion_count_ > 0 && current_thread_is(lock_owner_tid_)) {
-      RAW_CHECK(lock_.IsHeld(), "Invariants violated");
-      recursion_count_++;
-      RAW_CHECK(recursion_count_ <= 5,
-                "recursive lock nesting unexpectedly deep");
-      return;
-    }
-  }
-  lock_.Lock();
-  {
-    SpinLockHolder l(&owner_lock_);
-    RAW_CHECK(recursion_count_ == 0,
-              "Last Unlock didn't reset recursion_count_");
-    if (libpthread_initialized)
-      lock_owner_tid_ = pthread_self();
-    recursion_count_ = 1;
-  }
-}
-
-void MemoryRegionMap::Unlock() {
-  SpinLockHolder l(&owner_lock_);
-  RAW_CHECK(recursion_count_ >  0, "unlock when not held");
-  RAW_CHECK(lock_.IsHeld(),
-            "unlock when not held, and recursion_count_ is wrong");
-  RAW_CHECK(current_thread_is(lock_owner_tid_), "unlock by non-holder");
-  recursion_count_--;
-  if (recursion_count_ == 0) {
-    lock_.Unlock();
-  }
-}
-
-bool MemoryRegionMap::LockIsHeld() {
-  SpinLockHolder l(&owner_lock_);
-  return lock_.IsHeld()  &&  current_thread_is(lock_owner_tid_);
-}
-
-const MemoryRegionMap::Region*
-MemoryRegionMap::DoFindRegionLocked(uintptr_t addr) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  if (regions_ != NULL) {
-    Region sample;
-    sample.SetRegionSetKey(addr);
-    RegionSet::iterator region = regions_->lower_bound(sample);
-    if (region != regions_->end()) {
-      RAW_CHECK(addr <= region->end_addr, "");
-      if (region->start_addr <= addr  &&  addr < region->end_addr) {
-        return &(*region);
-      }
-    }
-  }
-  return NULL;
-}
-
-bool MemoryRegionMap::FindRegion(uintptr_t addr, Region* result) {
-  Lock();
-  const Region* region = DoFindRegionLocked(addr);
-  if (region != NULL) *result = *region;  // create it as an independent copy
-  Unlock();
-  return region != NULL;
-}
-
-bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top,
-                                             Region* result) {
-  Lock();
-  const Region* region = DoFindRegionLocked(stack_top);
-  if (region != NULL) {
-    RAW_VLOG(10, "Stack at %p is inside region %p..%p",
-                reinterpret_cast<void*>(stack_top),
-                reinterpret_cast<void*>(region->start_addr),
-                reinterpret_cast<void*>(region->end_addr));
-    const_cast<Region*>(region)->set_is_stack();  // now we know
-      // cast is safe (set_is_stack does not change the set ordering key)
-    *result = *region;  // create *result as an independent copy
-  }
-  Unlock();
-  return region != NULL;
-}
-
-HeapProfileBucket* MemoryRegionMap::GetBucket(int depth,
-                                              const void* const key[]) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  // Make hash-value
-  uintptr_t hash = 0;
-  for (int i = 0; i < depth; i++) {
-    hash += reinterpret_cast<uintptr_t>(key[i]);
-    hash += hash << 10;
-    hash ^= hash >> 6;
-  }
-  hash += hash << 3;
-  hash ^= hash >> 11;
-
-  // Lookup stack trace in table
-  unsigned int hash_index = (static_cast<unsigned int>(hash)) % kHashTableSize;
-  for (HeapProfileBucket* bucket = bucket_table_[hash_index];
-       bucket != 0;
-       bucket = bucket->next) {
-    if ((bucket->hash == hash) && (bucket->depth == depth) &&
-        std::equal(key, key + depth, bucket->stack)) {
-      return bucket;
-    }
-  }
-
-  // Create new bucket
-  const size_t key_size = sizeof(key[0]) * depth;
-  HeapProfileBucket* bucket;
-  if (recursive_insert) {  // recursion: save in saved_buckets_
-    const void** key_copy = saved_buckets_keys_[saved_buckets_count_];
-    std::copy(key, key + depth, key_copy);
-    bucket = &saved_buckets_[saved_buckets_count_];
-    memset(bucket, 0, sizeof(*bucket));
-    ++saved_buckets_count_;
-    bucket->stack = key_copy;
-    bucket->next  = NULL;
-  } else {
-    recursive_insert = true;
-    const void** key_copy = static_cast<const void**>(
-        MyAllocator::Allocate(key_size));
-    recursive_insert = false;
-    std::copy(key, key + depth, key_copy);
-    recursive_insert = true;
-    bucket = static_cast<HeapProfileBucket*>(
-        MyAllocator::Allocate(sizeof(HeapProfileBucket)));
-    recursive_insert = false;
-    memset(bucket, 0, sizeof(*bucket));
-    bucket->stack = key_copy;
-    bucket->next  = bucket_table_[hash_index];
-  }
-  bucket->hash = hash;
-  bucket->depth = depth;
-  bucket_table_[hash_index] = bucket;
-  ++num_buckets_;
-  return bucket;
-}
-
-MemoryRegionMap::RegionIterator MemoryRegionMap::BeginRegionLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_CHECK(regions_ != NULL, "");
-  return regions_->begin();
-}
-
-MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_CHECK(regions_ != NULL, "");
-  return regions_->end();
-}
-
-inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
-  RAW_VLOG(12, "Inserting region %p..%p from %p",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr),
-              reinterpret_cast<void*>(region.caller()));
-  RegionSet::const_iterator i = regions_->lower_bound(region);
-  if (i != regions_->end() && i->start_addr <= region.start_addr) {
-    RAW_DCHECK(region.end_addr <= i->end_addr, "");  // lower_bound ensures this
-    return;  // 'region' is a subset of an already recorded region; do nothing
-    // We can be stricter and allow this only when *i has been created via
-    // an mmap with MAP_NORESERVE flag set.
-  }
-  if (DEBUG_MODE) {
-    RAW_CHECK(i == regions_->end()  ||  !region.Overlaps(*i),
-              "Wow, overlapping memory regions");
-    Region sample;
-    sample.SetRegionSetKey(region.start_addr);
-    i = regions_->lower_bound(sample);
-    RAW_CHECK(i == regions_->end()  ||  !region.Overlaps(*i),
-              "Wow, overlapping memory regions");
-  }
-  region.AssertIsConsistent();  // just making sure
-  // This inserts and allocates permanent storage for region
-  // and its call stack data: it's safe to do it now:
-  regions_->insert(region);
-  RAW_VLOG(12, "Inserted region %p..%p :",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr));
-  if (VLOG_IS_ON(12))  LogAllLocked();
-}
-
-// These variables are local to MemoryRegionMap::InsertRegionLocked()
-// and MemoryRegionMap::HandleSavedRegionsLocked()
-// and are file-level to ensure that they are initialized at load time.
-
-// Number of unprocessed region inserts.
-static int saved_regions_count = 0;
-
-// Unprocessed inserts (must be big enough to hold all allocations that can
-// be caused by a InsertRegionLocked call).
-// Region has no constructor, so that c-tor execution does not interfere
-// with the any-time use of the static memory behind saved_regions.
-static MemoryRegionMap::Region saved_regions[20];
-
-inline void MemoryRegionMap::HandleSavedRegionsLocked(
-              void (*insert_func)(const Region& region)) {
-  while (saved_regions_count > 0) {
-    // Making a local-var copy of the region argument to insert_func
-    // including its stack (w/o doing any memory allocations) is important:
-    // in many cases the memory in saved_regions
-    // will get written-to during the (*insert_func)(r) call below.
-    Region r = saved_regions[--saved_regions_count];
-    (*insert_func)(r);
-  }
-}
-
-void MemoryRegionMap::RestoreSavedBucketsLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  while (saved_buckets_count_ > 0) {
-    HeapProfileBucket bucket = saved_buckets_[--saved_buckets_count_];
-    unsigned int hash_index =
-        static_cast<unsigned int>(bucket.hash) % kHashTableSize;
-    bool is_found = false;
-    for (HeapProfileBucket* curr = bucket_table_[hash_index];
-         curr != 0;
-         curr = curr->next) {
-      if ((curr->hash == bucket.hash) && (curr->depth == bucket.depth) &&
-          std::equal(bucket.stack, bucket.stack + bucket.depth, curr->stack)) {
-        curr->allocs += bucket.allocs;
-        curr->alloc_size += bucket.alloc_size;
-        curr->frees += bucket.frees;
-        curr->free_size += bucket.free_size;
-        is_found = true;
-        break;
-      }
-    }
-    if (is_found) continue;
-
-    const size_t key_size = sizeof(bucket.stack[0]) * bucket.depth;
-    const void** key_copy = static_cast<const void**>(
-        MyAllocator::Allocate(key_size));
-    std::copy(bucket.stack, bucket.stack + bucket.depth, key_copy);
-    HeapProfileBucket* new_bucket = static_cast<HeapProfileBucket*>(
-        MyAllocator::Allocate(sizeof(HeapProfileBucket)));
-    memset(new_bucket, 0, sizeof(*new_bucket));
-    new_bucket->hash = bucket.hash;
-    new_bucket->depth = bucket.depth;
-    new_bucket->stack = key_copy;
-    new_bucket->next = bucket_table_[hash_index];
-    bucket_table_[hash_index] = new_bucket;
-    ++num_buckets_;
-  }
-}
-
-inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  // We can be called recursively, because RegionSet constructor
-  // and DoInsertRegionLocked() (called below) can call the allocator.
-  // recursive_insert tells us if that's the case. When this happens,
-  // region insertion information is recorded in saved_regions[],
-  // and taken into account when the recursion unwinds.
-  // Do the insert:
-  if (recursive_insert) {  // recursion: save in saved_regions
-    RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p",
-                reinterpret_cast<void*>(region.start_addr),
-                reinterpret_cast<void*>(region.end_addr),
-                reinterpret_cast<void*>(region.caller()));
-    RAW_CHECK(saved_regions_count < arraysize(saved_regions), "");
-    // Copy 'region' to saved_regions[saved_regions_count]
-    // together with the contents of its call_stack,
-    // then increment saved_regions_count.
-    saved_regions[saved_regions_count++] = region;
-  } else {  // not a recusrive call
-    if (regions_ == NULL) {  // init regions_
-      RAW_VLOG(12, "Initializing region set");
-      regions_ = regions_rep.region_set();
-      recursive_insert = true;
-      new(regions_) RegionSet();
-      HandleSavedRegionsLocked(&DoInsertRegionLocked);
-      recursive_insert = false;
-    }
-    recursive_insert = true;
-    // Do the actual insertion work to put new regions into regions_:
-    DoInsertRegionLocked(region);
-    HandleSavedRegionsLocked(&DoInsertRegionLocked);
-    recursive_insert = false;
-  }
-}
-
-// We strip out different number of stack frames in debug mode
-// because less inlining happens in that case
-#ifdef NDEBUG
-static const int kStripFrames = 1;
-#else
-static const int kStripFrames = 3;
-#endif
-
-void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
-  // Record start/end info about this memory acquisition call in a new region:
-  Region region;
-  region.Create(start, size);
-  // First get the call stack info into the local varible 'region':
-  int depth = 0;
-  // NOTE: libunwind also does mmap and very much likely while holding
-  // it's own lock(s). So some threads may first take libunwind lock,
-  // and then take region map lock (necessary to record mmap done from
-  // inside libunwind). On the other hand other thread(s) may do
-  // normal mmap. Which would call this method to record it. Which
-  // would then proceed with installing that record to region map
-  // while holding region map lock. That may cause mmap from our own
-  // internal allocators, so attempt to unwind in this case may cause
-  // reverse order of taking libuwind and region map locks. Which is
-  // obvious deadlock.
-  //
-  // Thankfully, we can easily detect if we're holding region map lock
-  // and avoid recording backtrace in this (rare and largely
-  // irrelevant) case. By doing this we "declare" that thread needing
-  // both locks must take region map lock last. In other words we do
-  // not allow taking libuwind lock when we already have region map
-  // lock. Note, this is generally impossible when somebody tries to
-  // mix cpu profiling and heap checking/profiling, because cpu
-  // profiler grabs backtraces at arbitrary places. But at least such
-  // combination is rarer and less relevant.
-  if (max_stack_depth_ > 0 && !LockIsHeld()) {
-    depth = MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack),
-                                            max_stack_depth_, kStripFrames + 1);
-  }
-  region.set_call_stack_depth(depth);  // record stack info fully
-  RAW_VLOG(10, "New global region %p..%p from %p",
-              reinterpret_cast<void*>(region.start_addr),
-              reinterpret_cast<void*>(region.end_addr),
-              reinterpret_cast<void*>(region.caller()));
-  // Note: none of the above allocates memory.
-  Lock();  // recursively lock
-  map_size_ += size;
-  InsertRegionLocked(region);
-    // This will (eventually) allocate storage for and copy over the stack data
-    // from region.call_stack_data_ that is pointed by region.call_stack().
-  if (bucket_table_ != NULL) {
-    HeapProfileBucket* b = GetBucket(depth, region.call_stack);
-    ++b->allocs;
-    b->alloc_size += size;
-    if (!recursive_insert) {
-      recursive_insert = true;
-      RestoreSavedBucketsLocked();
-      recursive_insert = false;
-    }
-  }
-  Unlock();
-}
-
-void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
-  Lock();
-  if (recursive_insert) {
-    // First remove the removed region from saved_regions, if it's
-    // there, to prevent overrunning saved_regions in recursive
-    // map/unmap call sequences, and also from later inserting regions
-    // which have already been unmapped.
-    uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
-    uintptr_t end_addr = start_addr + size;
-    int put_pos = 0;
-    int old_count = saved_regions_count;
-    for (int i = 0; i < old_count; ++i, ++put_pos) {
-      Region& r = saved_regions[i];
-      if (r.start_addr == start_addr && r.end_addr == end_addr) {
-        // An exact match, so it's safe to remove.
-        RecordRegionRemovalInBucket(r.call_stack_depth, r.call_stack, size);
-        --saved_regions_count;
-        --put_pos;
-        RAW_VLOG(10, ("Insta-Removing saved region %p..%p; "
-                     "now have %d saved regions"),
-                 reinterpret_cast<void*>(start_addr),
-                 reinterpret_cast<void*>(end_addr),
-                 saved_regions_count);
-      } else {
-        if (put_pos < i) {
-          saved_regions[put_pos] = saved_regions[i];
-        }
-      }
-    }
-  }
-  if (regions_ == NULL) {  // We must have just unset the hooks,
-                           // but this thread was already inside the hook.
-    Unlock();
-    return;
-  }
-  if (!recursive_insert) {
-    HandleSavedRegionsLocked(&InsertRegionLocked);
-  }
-    // first handle adding saved regions if any
-  uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
-  uintptr_t end_addr = start_addr + size;
-  // subtract start_addr, end_addr from all the regions
-  RAW_VLOG(10, "Removing global region %p..%p; have %" PRIuS " regions",
-              reinterpret_cast<void*>(start_addr),
-              reinterpret_cast<void*>(end_addr),
-              regions_->size());
-  Region sample;
-  sample.SetRegionSetKey(start_addr);
-  // Only iterate over the regions that might overlap start_addr..end_addr:
-  for (RegionSet::iterator region = regions_->lower_bound(sample);
-       region != regions_->end()  &&  region->start_addr < end_addr;
-       /*noop*/) {
-    RAW_VLOG(13, "Looking at region %p..%p",
-                reinterpret_cast<void*>(region->start_addr),
-                reinterpret_cast<void*>(region->end_addr));
-    if (start_addr <= region->start_addr  &&
-        region->end_addr <= end_addr) {  // full deletion
-      RAW_VLOG(12, "Deleting region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  region->end_addr - region->start_addr);
-      RegionSet::iterator d = region;
-      ++region;
-      regions_->erase(d);
-      continue;
-    } else if (region->start_addr < start_addr  &&
-               end_addr < region->end_addr) {  // cutting-out split
-      RAW_VLOG(12, "Splitting region %p..%p in two",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  end_addr - start_addr);
-      // Make another region for the start portion:
-      // The new region has to be the start portion because we can't
-      // just modify region->end_addr as it's the sorting key.
-      Region r = *region;
-      r.set_end_addr(start_addr);
-      InsertRegionLocked(r);
-      // cut *region from start:
-      const_cast<Region&>(*region).set_start_addr(end_addr);
-    } else if (end_addr > region->start_addr  &&
-               start_addr <= region->start_addr) {  // cut from start
-      RAW_VLOG(12, "Start-chopping region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  end_addr - region->start_addr);
-      const_cast<Region&>(*region).set_start_addr(end_addr);
-    } else if (start_addr > region->start_addr  &&
-               start_addr < region->end_addr) {  // cut from end
-      RAW_VLOG(12, "End-chopping region %p..%p",
-                  reinterpret_cast<void*>(region->start_addr),
-                  reinterpret_cast<void*>(region->end_addr));
-      RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack,
-                                  region->end_addr - start_addr);
-      // Can't just modify region->end_addr (it's the sorting key):
-      Region r = *region;
-      r.set_end_addr(start_addr);
-      RegionSet::iterator d = region;
-      ++region;
-      // It's safe to erase before inserting since r is independent of *d:
-      // r contains an own copy of the call stack:
-      regions_->erase(d);
-      InsertRegionLocked(r);
-      continue;
-    }
-    ++region;
-  }
-  RAW_VLOG(12, "Removed region %p..%p; have %" PRIuS " regions",
-              reinterpret_cast<void*>(start_addr),
-              reinterpret_cast<void*>(end_addr),
-              regions_->size());
-  if (VLOG_IS_ON(12))  LogAllLocked();
-  unmap_size_ += size;
-  Unlock();
-}
-
-void MemoryRegionMap::RecordRegionRemovalInBucket(int depth,
-                                                  const void* const stack[],
-                                                  size_t size) {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  if (bucket_table_ == NULL) return;
-  HeapProfileBucket* b = GetBucket(depth, stack);
-  ++b->frees;
-  b->free_size += size;
-}
-
-void MemoryRegionMap::MmapHook(const void* result,
-                               const void* start, size_t size,
-                               int prot, int flags,
-                               int fd, off_t offset) {
-  // TODO(maxim): replace all 0x%" PRIxS " by %p when RAW_VLOG uses a safe
-  // snprintf reimplementation that does not malloc to pretty-print NULL
-  RAW_VLOG(10, "MMap = 0x%" PRIxPTR " of %" PRIuS " at %" PRIu64 " "
-              "prot %d flags %d fd %d offs %" PRId64,
-              reinterpret_cast<uintptr_t>(result), size,
-              reinterpret_cast<uint64>(start), prot, flags, fd,
-              static_cast<int64>(offset));
-  if (result != reinterpret_cast<void*>(MAP_FAILED)  &&  size != 0) {
-    RecordRegionAddition(result, size);
-  }
-}
-
-void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) {
-  RAW_VLOG(10, "MUnmap of %p %" PRIuS "", ptr, size);
-  if (size != 0) {
-    RecordRegionRemoval(ptr, size);
-  }
-}
-
-void MemoryRegionMap::MremapHook(const void* result,
-                                 const void* old_addr, size_t old_size,
-                                 size_t new_size, int flags,
-                                 const void* new_addr) {
-  RAW_VLOG(10, "MRemap = 0x%" PRIxPTR " of 0x%" PRIxPTR " %" PRIuS " "
-              "to %" PRIuS " flags %d new_addr=0x%" PRIxPTR,
-              (uintptr_t)result, (uintptr_t)old_addr,
-               old_size, new_size, flags,
-               flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0);
-  if (result != reinterpret_cast<void*>(-1)) {
-    RecordRegionRemoval(old_addr, old_size);
-    RecordRegionAddition(result, new_size);
-  }
-}
-
-void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) {
-  RAW_VLOG(10, "Sbrk = 0x%" PRIxPTR " of %" PRIdS "", (uintptr_t)result, increment);
-  if (result != reinterpret_cast<void*>(-1)) {
-    if (increment > 0) {
-      void* new_end = sbrk(0);
-      RecordRegionAddition(result, reinterpret_cast<uintptr_t>(new_end) -
-                                   reinterpret_cast<uintptr_t>(result));
-    } else if (increment < 0) {
-      void* new_end = sbrk(0);
-      RecordRegionRemoval(new_end, reinterpret_cast<uintptr_t>(result) -
-                                   reinterpret_cast<uintptr_t>(new_end));
-    }
-  }
-}
-
-void MemoryRegionMap::LogAllLocked() {
-  RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
-  RAW_LOG(INFO, "List of regions:");
-  uintptr_t previous = 0;
-  for (RegionSet::const_iterator r = regions_->begin();
-       r != regions_->end(); ++r) {
-    RAW_LOG(INFO, "Memory region 0x%" PRIxPTR "..0x%" PRIxPTR " "
-                  "from 0x%" PRIxPTR " stack=%d",
-                  r->start_addr, r->end_addr, r->caller(), r->is_stack);
-    RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order");
-      // this must be caused by uncontrolled recursive operations on regions_
-    previous = r->end_addr;
-  }
-  RAW_LOG(INFO, "End of regions list");
-}
diff --git a/contrib/libtcmalloc/src/memory_region_map.h b/contrib/libtcmalloc/src/memory_region_map.h
deleted file mode 100644
index ec388e1cc54..00000000000
--- a/contrib/libtcmalloc/src/memory_region_map.h
+++ /dev/null
@@ -1,413 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-/* Copyright (c) 2006, Google Inc.
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Maxim Lifantsev
- */
-
-#ifndef BASE_MEMORY_REGION_MAP_H_
-#define BASE_MEMORY_REGION_MAP_H_
-
-#include <config.h>
-
-#ifdef HAVE_PTHREAD
-#include <pthread.h>
-#endif
-#include <stddef.h>
-#include <set>
-#include "base/stl_allocator.h"
-#include "base/spinlock.h"
-#include "base/thread_annotations.h"
-#include "base/low_level_alloc.h"
-#include "heap-profile-stats.h"
-
-// TODO(maxim): add a unittest:
-//  execute a bunch of mmaps and compare memory map what strace logs
-//  execute a bunch of mmap/munmup and compare memory map with
-//  own accounting of what those mmaps generated
-
-// Thread-safe class to collect and query the map of all memory regions
-// in a process that have been created with mmap, munmap, mremap, sbrk.
-// For each memory region, we keep track of (and provide to users)
-// the stack trace that allocated that memory region.
-// The recorded stack trace depth is bounded by
-// a user-supplied max_stack_depth parameter of Init().
-// After initialization with Init()
-// (which can happened even before global object constructor execution)
-// we collect the map by installing and monitoring MallocHook-s
-// to mmap, munmap, mremap, sbrk.
-// At any time one can query this map via provided interface.
-// For more details on the design of MemoryRegionMap
-// see the comment at the top of our .cc file.
-class MemoryRegionMap {
- private:
-  // Max call stack recording depth supported by Init().  Set it to be
-  // high enough for all our clients.  Note: we do not define storage
-  // for this (doing that requires special handling in windows), so
-  // don't take the address of it!
-  static const int kMaxStackDepth = 32;
-
-  // Size of the hash table of buckets.  A structure of the bucket table is
-  // described in heap-profile-stats.h.
-  static const int kHashTableSize = 179999;
-
- public:
-  // interface ================================================================
-
-  // Every client of MemoryRegionMap must call Init() before first use,
-  // and Shutdown() after last use.  This allows us to reference count
-  // this (singleton) class properly.  MemoryRegionMap assumes it's the
-  // only client of MallocHooks, so a client can only register other
-  // MallocHooks after calling Init() and must unregister them before
-  // calling Shutdown().
-
-  // Initialize this module to record memory allocation stack traces.
-  // Stack traces that have more than "max_stack_depth" frames
-  // are automatically shrunk to "max_stack_depth" when they are recorded.
-  // Init() can be called more than once w/o harm, largest max_stack_depth
-  // will be the effective one.
-  // When "use_buckets" is true, then counts of mmap and munmap sizes will be
-  // recorded with each stack trace.  If Init() is called more than once, then
-  // counting will be effective after any call contained "use_buckets" of true.
-  // It will install mmap, munmap, mremap, sbrk hooks
-  // and initialize arena_ and our hook and locks, hence one can use
-  // MemoryRegionMap::Lock()/Unlock() to manage the locks.
-  // Uses Lock/Unlock inside.
-  static void Init(int max_stack_depth, bool use_buckets);
-
-  // Try to shutdown this module undoing what Init() did.
-  // Returns true iff could do full shutdown (or it was not attempted).
-  // Full shutdown is attempted when the number of Shutdown() calls equals
-  // the number of Init() calls.
-  static bool Shutdown();
-
-  // Return true if MemoryRegionMap is initialized and recording, i.e. when
-  // then number of Init() calls are more than the number of Shutdown() calls.
-  static bool IsRecordingLocked();
-
-  // Locks to protect our internal data structures.
-  // These also protect use of arena_ if our Init() has been done.
-  // The lock is recursive.
-  static void Lock() EXCLUSIVE_LOCK_FUNCTION(lock_);
-  static void Unlock() UNLOCK_FUNCTION(lock_);
-
-  // Returns true when the lock is held by this thread (for use in RAW_CHECK-s).
-  static bool LockIsHeld();
-
-  // Locker object that acquires the MemoryRegionMap::Lock
-  // for the duration of its lifetime (a C++ scope).
-  class LockHolder {
-   public:
-    LockHolder() { Lock(); }
-    ~LockHolder() { Unlock(); }
-   private:
-    DISALLOW_COPY_AND_ASSIGN(LockHolder);
-  };
-
-  // A memory region that we know about through malloc_hook-s.
-  // This is essentially an interface through which MemoryRegionMap
-  // exports the collected data to its clients.  Thread-compatible.
-  struct Region {
-    uintptr_t start_addr;  // region start address
-    uintptr_t end_addr;  // region end address
-    int call_stack_depth;  // number of caller stack frames that we saved
-    const void* call_stack[kMaxStackDepth];  // caller address stack array
-                                             // filled to call_stack_depth size
-    bool is_stack;  // does this region contain a thread's stack:
-                    // a user of MemoryRegionMap supplies this info
-
-    // Convenience accessor for call_stack[0],
-    // i.e. (the program counter of) the immediate caller
-    // of this region's allocation function,
-    // but it also returns NULL when call_stack_depth is 0,
-    // i.e whe we weren't able to get the call stack.
-    // This usually happens in recursive calls, when the stack-unwinder
-    // calls mmap() which in turn calls the stack-unwinder.
-    uintptr_t caller() const {
-      return reinterpret_cast<uintptr_t>(call_stack_depth >= 1
-                                         ? call_stack[0] : NULL);
-    }
-
-    // Return true iff this region overlaps region x.
-    bool Overlaps(const Region& x) const {
-      return start_addr < x.end_addr  &&  end_addr > x.start_addr;
-    }
-
-   private:  // helpers for MemoryRegionMap
-    friend class MemoryRegionMap;
-
-    // The ways we create Region-s:
-    void Create(const void* start, size_t size) {
-      start_addr = reinterpret_cast<uintptr_t>(start);
-      end_addr = start_addr + size;
-      is_stack = false;  // not a stack till marked such
-      call_stack_depth = 0;
-      AssertIsConsistent();
-    }
-    void set_call_stack_depth(int depth) {
-      RAW_DCHECK(call_stack_depth == 0, "");  // only one such set is allowed
-      call_stack_depth = depth;
-      AssertIsConsistent();
-    }
-
-    // The ways we modify Region-s:
-    void set_is_stack() { is_stack = true; }
-    void set_start_addr(uintptr_t addr) {
-      start_addr = addr;
-      AssertIsConsistent();
-    }
-    void set_end_addr(uintptr_t addr) {
-      end_addr = addr;
-      AssertIsConsistent();
-    }
-
-    // Verifies that *this contains consistent data, crashes if not the case.
-    void AssertIsConsistent() const {
-      RAW_DCHECK(start_addr < end_addr, "");
-      RAW_DCHECK(call_stack_depth >= 0  &&
-                 call_stack_depth <= kMaxStackDepth, "");
-    }
-
-    // Post-default construction helper to make a Region suitable
-    // for searching in RegionSet regions_.
-    void SetRegionSetKey(uintptr_t addr) {
-      // make sure *this has no usable data:
-      if (DEBUG_MODE) memset(this, 0xFF, sizeof(*this));
-      end_addr = addr;
-    }
-
-    // Note: call_stack[kMaxStackDepth] as a member lets us make Region
-    // a simple self-contained struct with correctly behaving bit-vise copying.
-    // This simplifies the code of this module but wastes some memory:
-    // in most-often use case of this module (leak checking)
-    // only one call_stack element out of kMaxStackDepth is actually needed.
-    // Making the storage for call_stack variable-sized,
-    // substantially complicates memory management for the Region-s:
-    // as they need to be created and manipulated for some time
-    // w/o any memory allocations, yet are also given out to the users.
-  };
-
-  // Find the region that covers addr and write its data into *result if found,
-  // in which case *result gets filled so that it stays fully functional
-  // even when the underlying region gets removed from MemoryRegionMap.
-  // Returns success. Uses Lock/Unlock inside.
-  static bool FindRegion(uintptr_t addr, Region* result);
-
-  // Find the region that contains stack_top, mark that region as
-  // a stack region, and write its data into *result if found,
-  // in which case *result gets filled so that it stays fully functional
-  // even when the underlying region gets removed from MemoryRegionMap.
-  // Returns success. Uses Lock/Unlock inside.
-  static bool FindAndMarkStackRegion(uintptr_t stack_top, Region* result);
-
-  // Iterate over the buckets which store mmap and munmap counts per stack
-  // trace.  It calls "callback" for each bucket, and passes "arg" to it.
-  template<class Type>
-  static void IterateBuckets(void (*callback)(const HeapProfileBucket*, Type),
-                             Type arg);
-
-  // Get the bucket whose caller stack trace is "key".  The stack trace is
-  // used to a depth of "depth" at most.  The requested bucket is created if
-  // needed.
-  // The bucket table is described in heap-profile-stats.h.
-  static HeapProfileBucket* GetBucket(int depth, const void* const key[]);
-
- private:  // our internal types ==============================================
-
-  // Region comparator for sorting with STL
-  struct RegionCmp {
-    bool operator()(const Region& x, const Region& y) const {
-      return x.end_addr < y.end_addr;
-    }
-  };
-
-  // We allocate STL objects in our own arena.
-  struct MyAllocator {
-    static void *Allocate(size_t n) {
-      return LowLevelAlloc::AllocWithArena(n, arena_);
-    }
-    static void Free(const void *p, size_t /* n */) {
-      LowLevelAlloc::Free(const_cast<void*>(p));
-    }
-  };
-
-  // Set of the memory regions
-  typedef std::set<Region, RegionCmp,
-              STL_Allocator<Region, MyAllocator> > RegionSet;
-
- public:  // more in-depth interface ==========================================
-
-  // STL iterator with values of Region
-  typedef RegionSet::const_iterator RegionIterator;
-
-  // Return the begin/end iterators to all the regions.
-  // These need Lock/Unlock protection around their whole usage (loop).
-  // Even when the same thread causes modifications during such a loop
-  // (which are permitted due to recursive locking)
-  // the loop iterator will still be valid as long as its region
-  // has not been deleted, but EndRegionLocked should be
-  // re-evaluated whenever the set of regions has changed.
-  static RegionIterator BeginRegionLocked();
-  static RegionIterator EndRegionLocked();
-
-  // Return the accumulated sizes of mapped and unmapped regions.
-  static int64 MapSize() { return map_size_; }
-  static int64 UnmapSize() { return unmap_size_; }
-
-  // Effectively private type from our .cc =================================
-  // public to let us declare global objects:
-  union RegionSetRep;
-
- private:
-  // representation ===========================================================
-
-  // Counter of clients of this module that have called Init().
-  static int client_count_;
-
-  // Maximal number of caller stack frames to save (>= 0).
-  static int max_stack_depth_;
-
-  // Arena used for our allocations in regions_.
-  static LowLevelAlloc::Arena* arena_;
-
-  // Set of the mmap/sbrk/mremap-ed memory regions
-  // To be accessed *only* when Lock() is held.
-  // Hence we protect the non-recursive lock used inside of arena_
-  // with our recursive Lock(). This lets a user prevent deadlocks
-  // when threads are stopped by TCMalloc_ListAllProcessThreads at random spots
-  // simply by acquiring our recursive Lock() before that.
-  static RegionSet* regions_;
-
-  // Lock to protect regions_ and buckets_ variables and the data behind.
-  static SpinLock lock_;
-  // Lock to protect the recursive lock itself.
-  static SpinLock owner_lock_;
-
-  // Recursion count for the recursive lock.
-  static int recursion_count_;
-  // The thread id of the thread that's inside the recursive lock.
-  static pthread_t lock_owner_tid_;
-
-  // Total size of all mapped pages so far
-  static int64 map_size_;
-  // Total size of all unmapped pages so far
-  static int64 unmap_size_;
-
-  // Bucket hash table which is described in heap-profile-stats.h.
-  static HeapProfileBucket** bucket_table_ GUARDED_BY(lock_);
-  static int num_buckets_ GUARDED_BY(lock_);
-
-  // The following members are local to MemoryRegionMap::GetBucket()
-  // and MemoryRegionMap::HandleSavedBucketsLocked()
-  // and are file-level to ensure that they are initialized at load time.
-  //
-  // These are used as temporary storage to break the infinite cycle of mmap
-  // calling our hook which (sometimes) causes mmap.  It must be a static
-  // fixed-size array.  The size 20 is just an expected value for safety.
-  // The details are described in memory_region_map.cc.
-
-  // Number of unprocessed bucket inserts.
-  static int saved_buckets_count_ GUARDED_BY(lock_);
-
-  // Unprocessed inserts (must be big enough to hold all mmaps that can be
-  // caused by a GetBucket call).
-  // Bucket has no constructor, so that c-tor execution does not interfere
-  // with the any-time use of the static memory behind saved_buckets.
-  static HeapProfileBucket saved_buckets_[20] GUARDED_BY(lock_);
-
-  static const void* saved_buckets_keys_[20][kMaxStackDepth] GUARDED_BY(lock_);
-
-  // helpers ==================================================================
-
-  // Helper for FindRegion and FindAndMarkStackRegion:
-  // returns the region covering 'addr' or NULL; assumes our lock_ is held.
-  static const Region* DoFindRegionLocked(uintptr_t addr);
-
-  // Verifying wrapper around regions_->insert(region)
-  // To be called to do InsertRegionLocked's work only!
-  inline static void DoInsertRegionLocked(const Region& region);
-  // Handle regions saved by InsertRegionLocked into a tmp static array
-  // by calling insert_func on them.
-  inline static void HandleSavedRegionsLocked(
-                       void (*insert_func)(const Region& region));
-
-  // Restore buckets saved in a tmp static array by GetBucket to the bucket
-  // table where all buckets eventually should be.
-  static void RestoreSavedBucketsLocked();
-
-  // Wrapper around DoInsertRegionLocked
-  // that handles the case of recursive allocator calls.
-  inline static void InsertRegionLocked(const Region& region);
-
-  // Record addition of a memory region at address "start" of size "size"
-  // (called from our mmap/mremap/sbrk hooks).
-  static void RecordRegionAddition(const void* start, size_t size);
-  // Record deletion of a memory region at address "start" of size "size"
-  // (called from our munmap/mremap/sbrk hooks).
-  static void RecordRegionRemoval(const void* start, size_t size);
-
-  // Record deletion of a memory region of size "size" in a bucket whose
-  // caller stack trace is "key".  The stack trace is used to a depth of
-  // "depth" at most.
-  static void RecordRegionRemovalInBucket(int depth,
-                                          const void* const key[],
-                                          size_t size);
-
-  // Hooks for MallocHook
-  static void MmapHook(const void* result,
-                       const void* start, size_t size,
-                       int prot, int flags,
-                       int fd, off_t offset);
-  static void MunmapHook(const void* ptr, size_t size);
-  static void MremapHook(const void* result, const void* old_addr,
-                         size_t old_size, size_t new_size, int flags,
-                         const void* new_addr);
-  static void SbrkHook(const void* result, ptrdiff_t increment);
-
-  // Log all memory regions; Useful for debugging only.
-  // Assumes Lock() is held
-  static void LogAllLocked();
-
-  DISALLOW_COPY_AND_ASSIGN(MemoryRegionMap);
-};
-
-template <class Type>
-void MemoryRegionMap::IterateBuckets(
-    void (*callback)(const HeapProfileBucket*, Type), Type callback_arg) {
-  for (int index = 0; index < kHashTableSize; index++) {
-    for (HeapProfileBucket* bucket = bucket_table_[index];
-         bucket != NULL;
-         bucket = bucket->next) {
-      callback(bucket, callback_arg);
-    }
-  }
-}
-
-#endif  // BASE_MEMORY_REGION_MAP_H_
diff --git a/contrib/libtcmalloc/src/packed-cache-inl.h b/contrib/libtcmalloc/src/packed-cache-inl.h
deleted file mode 100644
index 09462608ece..00000000000
--- a/contrib/libtcmalloc/src/packed-cache-inl.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Geoff Pike
-//
-// This file provides a minimal cache that can hold a <key, value> pair
-// with little if any wasted space.  The types of the key and value
-// must be unsigned integral types or at least have unsigned semantics
-// for >>, casting, and similar operations.
-//
-// Synchronization is not provided.  However, the cache is implemented
-// as an array of cache entries whose type is chosen at compile time.
-// If a[i] is atomic on your hardware for the chosen array type then
-// raciness will not necessarily lead to bugginess.  The cache entries
-// must be large enough to hold a partial key and a value packed
-// together.  The partial keys are bit strings of length
-// kKeybits - kHashbits, and the values are bit strings of length kValuebits.
-//
-// In an effort to use minimal space, every cache entry represents
-// some <key, value> pair; the class provides no way to mark a cache
-// entry as empty or uninitialized.  In practice, you may want to have
-// reserved keys or values to get around this limitation.  For example, in
-// tcmalloc's PageID-to-sizeclass cache, a value of 0 is used as
-// "unknown sizeclass."
-//
-// Usage Considerations
-// --------------------
-//
-// kHashbits controls the size of the cache.  The best value for
-// kHashbits will of course depend on the application.  Perhaps try
-// tuning the value of kHashbits by measuring different values on your
-// favorite benchmark.  Also remember not to be a pig; other
-// programs that need resources may suffer if you are.
-//
-// The main uses for this class will be when performance is
-// critical and there's a convenient type to hold the cache's
-// entries.  As described above, the number of bits required
-// for a cache entry is (kKeybits - kHashbits) + kValuebits.  Suppose
-// kKeybits + kValuebits is 43.  Then it probably makes sense to
-// chose kHashbits >= 11 so that cache entries fit in a uint32.
-//
-// On the other hand, suppose kKeybits = kValuebits = 64.  Then
-// using this class may be less worthwhile.  You'll probably
-// be using 128 bits for each entry anyway, so maybe just pick
-// a hash function, H, and use an array indexed by H(key):
-//    void Put(K key, V value) { a_[H(key)] = pair<K, V>(key, value); }
-//    V GetOrDefault(K key, V default) { const pair<K, V> &p = a_[H(key)]; ... }
-//    etc.
-//
-// Further Details
-// ---------------
-//
-// For caches used only by one thread, the following is true:
-// 1. For a cache c,
-//      (c.Put(key, value), c.GetOrDefault(key, 0)) == value
-//    and
-//      (c.Put(key, value), <...>, c.GetOrDefault(key, 0)) == value
-//    if the elided code contains no c.Put calls.
-//
-// 2. Has(key) will return false if no <key, value> pair with that key
-//    has ever been Put.  However, a newly initialized cache will have
-//    some <key, value> pairs already present.  When you create a new
-//    cache, you must specify an "initial value."  The initialization
-//    procedure is equivalent to Clear(initial_value), which is
-//    equivalent to Put(k, initial_value) for all keys k from 0 to
-//    2^kHashbits - 1.
-//
-// 3. If key and key' differ then the only way Put(key, value) may
-//    cause Has(key') to change is that Has(key') may change from true to
-//    false. Furthermore, a Put() call that doesn't change Has(key')
-//    doesn't change GetOrDefault(key', ...) either.
-//
-// Implementation details:
-//
-// This is a direct-mapped cache with 2^kHashbits entries; the hash
-// function simply takes the low bits of the key.  We store whole keys
-// if a whole key plus a whole value fits in an entry.  Otherwise, an
-// entry is the high bits of a key and a value, packed together.
-// E.g., a 20 bit key and a 7 bit value only require a uint16 for each
-// entry if kHashbits >= 11.
-//
-// Alternatives to this scheme will be added as needed.
-
-#ifndef TCMALLOC_PACKED_CACHE_INL_H_
-#define TCMALLOC_PACKED_CACHE_INL_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t
-#endif
-#include "base/basictypes.h"
-#include "internal_logging.h"
-
-// A safe way of doing "(1 << n) - 1" -- without worrying about overflow
-// Note this will all be resolved to a constant expression at compile-time
-#define N_ONES_(IntType, N)                                     \
-  ( (N) == 0 ? 0 : ((static_cast<IntType>(1) << ((N)-1))-1 +    \
-                    (static_cast<IntType>(1) << ((N)-1))) )
-
-// The types K and V provide upper bounds on the number of valid keys
-// and values, but we explicitly require the keys to be less than
-// 2^kKeybits and the values to be less than 2^kValuebits.  The size of
-// the table is controlled by kHashbits, and the type of each entry in
-// the cache is T.  See also the big comment at the top of the file.
-template <int kKeybits, typename T>
-class PackedCache {
- public:
-  typedef uintptr_t K;
-  typedef size_t V;
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  // Decrease the size map cache if running in the small memory mode.
-  static const int kHashbits = 12;
-#else
-  static const int kHashbits = 16;
-#endif
-  static const int kValuebits = 7;
-  static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T);
-
-  explicit PackedCache(V initial_value) {
-    COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size);
-    COMPILE_ASSERT(kValuebits <= sizeof(V) * 8, value_size);
-    COMPILE_ASSERT(kHashbits <= kKeybits, hash_function);
-    COMPILE_ASSERT(kKeybits - kHashbits + kValuebits <= kTbits,
-                   entry_size_must_be_big_enough);
-    Clear(initial_value);
-  }
-
-  void Put(K key, V value) {
-    ASSERT(key == (key & kKeyMask));
-    ASSERT(value == (value & kValueMask));
-    array_[Hash(key)] = KeyToUpper(key) | value;
-  }
-
-  bool Has(K key) const {
-    ASSERT(key == (key & kKeyMask));
-    return KeyMatch(array_[Hash(key)], key);
-  }
-
-  V GetOrDefault(K key, V default_value) const {
-    // As with other code in this class, we touch array_ as few times
-    // as we can.  Assuming entries are read atomically (e.g., their
-    // type is uintptr_t on most hardware) then certain races are
-    // harmless.
-    ASSERT(key == (key & kKeyMask));
-    T entry = array_[Hash(key)];
-    return KeyMatch(entry, key) ? EntryToValue(entry) : default_value;
-  }
-
-  void Clear(V value) {
-    ASSERT(value == (value & kValueMask));
-    for (int i = 0; i < 1 << kHashbits; i++) {
-      ASSERT(kUseWholeKeys || KeyToUpper(i) == 0);
-      array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value;
-    }
-  }
-
- private:
-  // We are going to pack a value and the upper part of a key (or a
-  // whole key) into an entry of type T.  The UPPER type is for the
-  // upper part of a key, after the key has been masked and shifted
-  // for inclusion in an entry.
-  typedef T UPPER;
-
-  static V EntryToValue(T t) { return t & kValueMask; }
-
-  // If we have space for a whole key, we just shift it left.
-  // Otherwise kHashbits determines where in a K to find the upper
-  // part of the key, and kValuebits determines where in the entry to
-  // put it.
-  static UPPER KeyToUpper(K k) {
-    if (kUseWholeKeys) {
-      return static_cast<T>(k) << kValuebits;
-    } else {
-      const int shift = kHashbits - kValuebits;
-      // Assume kHashbits >= kValuebits.  It'd be easy to lift this assumption.
-      return static_cast<T>(k >> shift) & kUpperMask;
-    }
-  }
-
-  static size_t Hash(K key) {
-    return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits);
-  }
-
-  // Does the entry match the relevant part of the given key?
-  static bool KeyMatch(T entry, K key) {
-    return kUseWholeKeys ?
-        (entry >> kValuebits == key) :
-        ((KeyToUpper(key) ^ entry) & kUpperMask) == 0;
-  }
-
-  static const int kTbits = 8 * sizeof(T);
-  static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits;
-
-  // For masking a K.
-  static const K kKeyMask = N_ONES_(K, kKeybits);
-
-  // For masking a T.
-  static const T kUpperMask = N_ONES_(T, kUpperbits) << kValuebits;
-
-  // For masking a V or a T.
-  static const V kValueMask = N_ONES_(V, kValuebits);
-
-  // array_ is the cache.  Its elements are volatile because any
-  // thread can write any array element at any time.
-  volatile T array_[1 << kHashbits];
-};
-
-#undef N_ONES_
-
-#endif  // TCMALLOC_PACKED_CACHE_INL_H_
diff --git a/contrib/libtcmalloc/src/page_heap.cc b/contrib/libtcmalloc/src/page_heap.cc
deleted file mode 100644
index f1915623308..00000000000
--- a/contrib/libtcmalloc/src/page_heap.cc
+++ /dev/null
@@ -1,682 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>                   // for PRIuPTR
-#endif
-#include <errno.h>                      // for ENOMEM, errno
-#include <gperftools/malloc_extension.h>      // for MallocRange, etc
-#include "base/basictypes.h"
-#include "base/commandlineflags.h"
-#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-#include "system-alloc.h"      // for TCMalloc_SystemAlloc, etc
-
-DEFINE_double(tcmalloc_release_rate,
-              EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0),
-              "Rate at which we release unused memory to the system.  "
-              "Zero means we never release memory back to the system.  "
-              "Increase this flag to return memory faster; decrease it "
-              "to return memory slower.  Reasonable rates are in the "
-              "range [0,10]");
-
-DEFINE_int64(tcmalloc_heap_limit_mb,
-              EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0),
-              "Limit total size of the process heap to the "
-              "specified number of MiB. "
-              "When we approach the limit the memory is released "
-              "to the system more aggressively (more minor page faults). "
-              "Zero means to allocate as long as system allows.");
-
-namespace tcmalloc {
-
-PageHeap::PageHeap()
-    : pagemap_(MetaDataAlloc),
-      pagemap_cache_(0),
-      scavenge_counter_(0),
-      // Start scavenging at kMaxPages list
-      release_index_(kMaxPages),
-      aggressive_decommit_(false) {
-  COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
-  DLL_Init(&large_.normal);
-  DLL_Init(&large_.returned);
-  for (int i = 0; i < kMaxPages; i++) {
-    DLL_Init(&free_[i].normal);
-    DLL_Init(&free_[i].returned);
-  }
-}
-
-Span* PageHeap::SearchFreeAndLargeLists(Length n) {
-  ASSERT(Check());
-  ASSERT(n > 0);
-
-  // Find first size >= n that has a non-empty list
-  for (Length s = n; s < kMaxPages; s++) {
-    Span* ll = &free_[s].normal;
-    // If we're lucky, ll is non-empty, meaning it has a suitable span.
-    if (!DLL_IsEmpty(ll)) {
-      ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
-      return Carve(ll->next, n);
-    }
-    // Alternatively, maybe there's a usable returned span.
-    ll = &free_[s].returned;
-    if (!DLL_IsEmpty(ll)) {
-      // We did not call EnsureLimit before, to avoid releasing the span
-      // that will be taken immediately back.
-      // Calling EnsureLimit here is not very expensive, as it fails only if
-      // there is no more normal spans (and it fails efficiently)
-      // or SystemRelease does not work (there is probably no returned spans).
-      if (EnsureLimit(n)) {
-        // ll may have became empty due to coalescing
-        if (!DLL_IsEmpty(ll)) {
-          ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
-          return Carve(ll->next, n);
-        }
-      }
-    }
-  }
-  // No luck in free lists, our last chance is in a larger class.
-  return AllocLarge(n);  // May be NULL
-}
-
-static const size_t kForcedCoalesceInterval = 128*1024*1024;
-
-Span* PageHeap::New(Length n) {
-  ASSERT(Check());
-  ASSERT(n > 0);
-
-  Span* result = SearchFreeAndLargeLists(n);
-  if (result != NULL)
-    return result;
-
-  if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
-      && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
-      && (stats_.system_bytes / kForcedCoalesceInterval
-          != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
-    // We're about to grow heap, but there are lots of free pages.
-    // tcmalloc's design decision to keep unmapped and free spans
-    // separately and never coalesce them means that sometimes there
-    // can be free pages span of sufficient size, but it consists of
-    // "segments" of different type so page heap search cannot find
-    // it. In order to prevent growing heap and wasting memory in such
-    // case we're going to unmap all free pages. So that all free
-    // spans are maximally coalesced.
-    //
-    // We're also limiting 'rate' of going into this path to be at
-    // most once per 128 megs of heap growth. Otherwise programs that
-    // grow heap frequently (and that means by small amount) could be
-    // penalized with higher count of minor page faults.
-    //
-    // See also large_heap_fragmentation_unittest.cc and
-    // https://code.google.com/p/gperftools/issues/detail?id=368
-    ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff));
-
-    // then try again. If we are forced to grow heap because of large
-    // spans fragmentation and not because of problem described above,
-    // then at the very least we've just unmapped free but
-    // insufficiently big large spans back to OS. So in case of really
-    // unlucky memory fragmentation we'll be consuming virtual address
-    // space, but not real memory
-    result = SearchFreeAndLargeLists(n);
-    if (result != NULL) return result;
-  }
-
-  // Grow the heap and try again.
-  if (!GrowHeap(n)) {
-    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-    ASSERT(Check());
-    // underlying SysAllocator likely set ENOMEM but we can get here
-    // due to EnsureLimit so we set it here too.
-    //
-    // Setting errno to ENOMEM here allows us to avoid dealing with it
-    // in fast-path.
-    errno = ENOMEM;
-    return NULL;
-  }
-  return SearchFreeAndLargeLists(n);
-}
-
-Span* PageHeap::AllocLarge(Length n) {
-  // find the best span (closest to n in size).
-  // The following loops implements address-ordered best-fit.
-  Span *best = NULL;
-
-  // Search through normal list
-  for (Span* span = large_.normal.next;
-       span != &large_.normal;
-       span = span->next) {
-    if (span->length >= n) {
-      if ((best == NULL)
-          || (span->length < best->length)
-          || ((span->length == best->length) && (span->start < best->start))) {
-        best = span;
-        ASSERT(best->location == Span::ON_NORMAL_FREELIST);
-      }
-    }
-  }
-
-  Span *bestNormal = best;
-
-  // Search through released list in case it has a better fit
-  for (Span* span = large_.returned.next;
-       span != &large_.returned;
-       span = span->next) {
-    if (span->length >= n) {
-      if ((best == NULL)
-          || (span->length < best->length)
-          || ((span->length == best->length) && (span->start < best->start))) {
-        best = span;
-        ASSERT(best->location == Span::ON_RETURNED_FREELIST);
-      }
-    }
-  }
-
-  if (best == bestNormal) {
-    return best == NULL ? NULL : Carve(best, n);
-  }
-
-  // best comes from returned list.
-
-  if (EnsureLimit(n, false)) {
-    return Carve(best, n);
-  }
-
-  if (EnsureLimit(n, true)) {
-    // best could have been destroyed by coalescing.
-    // bestNormal is not a best-fit, and it could be destroyed as well.
-    // We retry, the limit is already ensured:
-    return AllocLarge(n);
-  }
-
-  // If bestNormal existed, EnsureLimit would succeeded:
-  ASSERT(bestNormal == NULL);
-  // We are not allowed to take best from returned list.
-  return NULL;
-}
-
-Span* PageHeap::Split(Span* span, Length n) {
-  ASSERT(0 < n);
-  ASSERT(n < span->length);
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->sizeclass == 0);
-  Event(span, 'T', n);
-
-  const int extra = span->length - n;
-  Span* leftover = NewSpan(span->start + n, extra);
-  ASSERT(leftover->location == Span::IN_USE);
-  Event(leftover, 'U', extra);
-  RecordSpan(leftover);
-  pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
-  span->length = n;
-
-  return leftover;
-}
-
-void PageHeap::CommitSpan(Span* span) {
-  TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift),
-                        static_cast<size_t>(span->length << kPageShift));
-  stats_.committed_bytes += span->length << kPageShift;
-}
-
-bool PageHeap::DecommitSpan(Span* span) {
-  bool rv = TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift),
-                                   static_cast<size_t>(span->length << kPageShift));
-  if (rv) {
-    stats_.committed_bytes -= span->length << kPageShift;
-  }
-
-  return rv;
-}
-
-Span* PageHeap::Carve(Span* span, Length n) {
-  ASSERT(n > 0);
-  ASSERT(span->location != Span::IN_USE);
-  const int old_location = span->location;
-  RemoveFromFreeList(span);
-  span->location = Span::IN_USE;
-  Event(span, 'A', n);
-
-  const int extra = span->length - n;
-  ASSERT(extra >= 0);
-  if (extra > 0) {
-    Span* leftover = NewSpan(span->start + n, extra);
-    leftover->location = old_location;
-    Event(leftover, 'S', extra);
-    RecordSpan(leftover);
-
-    // The previous span of |leftover| was just splitted -- no need to
-    // coalesce them. The next span of |leftover| was not previously coalesced
-    // with |span|, i.e. is NULL or has got location other than |old_location|.
-#ifndef NDEBUG
-    const PageID p = leftover->start;
-    const Length len = leftover->length;
-    Span* next = GetDescriptor(p+len);
-    ASSERT (next == NULL ||
-            next->location == Span::IN_USE ||
-            next->location != leftover->location);
-#endif
-
-    PrependToFreeList(leftover);  // Skip coalescing - no candidates possible
-    span->length = n;
-    pagemap_.set(span->start + n - 1, span);
-  }
-  ASSERT(Check());
-  if (old_location == Span::ON_RETURNED_FREELIST) {
-    // We need to recommit this address space.
-    CommitSpan(span);
-  }
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->length == n);
-  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-  return span;
-}
-
-void PageHeap::Delete(Span* span) {
-  ASSERT(Check());
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(span->length > 0);
-  ASSERT(GetDescriptor(span->start) == span);
-  ASSERT(GetDescriptor(span->start + span->length - 1) == span);
-  const Length n = span->length;
-  span->sizeclass = 0;
-  span->sample = 0;
-  span->location = Span::ON_NORMAL_FREELIST;
-  Event(span, 'D', span->length);
-  MergeIntoFreeList(span);  // Coalesces if possible
-  IncrementalScavenge(n);
-  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-  ASSERT(Check());
-}
-
-bool PageHeap::MayMergeSpans(Span *span, Span *other) {
-  if (aggressive_decommit_) {
-    return other->location != Span::IN_USE;
-  }
-  return span->location == other->location;
-}
-
-void PageHeap::MergeIntoFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-
-  // Coalesce -- we guarantee that "p" != 0, so no bounds checking
-  // necessary.  We do not bother resetting the stale pagemap
-  // entries for the pieces we are merging together because we only
-  // care about the pagemap entries for the boundaries.
-  //
-  // Note: depending on aggressive_decommit_ mode we allow only
-  // similar spans to be coalesced.
-  //
-  // The following applies if aggressive_decommit_ is enabled:
-  //
-  // Note that the adjacent spans we merge into "span" may come out of a
-  // "normal" (committed) list, and cleanly merge with our IN_USE span, which
-  // is implicitly committed.  If the adjacents spans are on the "returned"
-  // (decommitted) list, then we must get both spans into the same state before
-  // or after we coalesce them.  The current code always decomits. This is
-  // achieved by blindly decommitting the entire coalesced region, which  may
-  // include any combination of committed and decommitted spans, at the end of
-  // the method.
-
-  // TODO(jar): "Always decommit" causes some extra calls to commit when we are
-  // called in GrowHeap() during an allocation :-/.  We need to eval the cost of
-  // that oscillation, and possibly do something to reduce it.
-
-  // TODO(jar): We need a better strategy for deciding to commit, or decommit,
-  // based on memory usage and free heap sizes.
-
-  uint64_t temp_committed = 0;
-
-  const PageID p = span->start;
-  const Length n = span->length;
-  Span* prev = GetDescriptor(p-1);
-  if (prev != NULL && MayMergeSpans(span, prev)) {
-    // Merge preceding span into this span
-    ASSERT(prev->start + prev->length == p);
-    const Length len = prev->length;
-    if (aggressive_decommit_ && prev->location == Span::ON_RETURNED_FREELIST) {
-      // We're about to put the merge span into the returned freelist and call
-      // DecommitSpan() on it, which will mark the entire span including this
-      // one as released and decrease stats_.committed_bytes by the size of the
-      // merged span.  To make the math work out we temporarily increase the
-      // stats_.committed_bytes amount.
-      temp_committed = prev->length << kPageShift;
-    }
-    RemoveFromFreeList(prev);
-    DeleteSpan(prev);
-    span->start -= len;
-    span->length += len;
-    pagemap_.set(span->start, span);
-    Event(span, 'L', len);
-  }
-  Span* next = GetDescriptor(p+n);
-  if (next != NULL && MayMergeSpans(span, next)) {
-    // Merge next span into this span
-    ASSERT(next->start == p+n);
-    const Length len = next->length;
-    if (aggressive_decommit_ && next->location == Span::ON_RETURNED_FREELIST) {
-      // See the comment below 'if (prev->location ...' for explanation.
-      temp_committed += next->length << kPageShift;
-    }
-    RemoveFromFreeList(next);
-    DeleteSpan(next);
-    span->length += len;
-    pagemap_.set(span->start + span->length - 1, span);
-    Event(span, 'R', len);
-  }
-
-  if (aggressive_decommit_) {
-    if (DecommitSpan(span)) {
-      span->location = Span::ON_RETURNED_FREELIST;
-      stats_.committed_bytes += temp_committed;
-    } else {
-      ASSERT(temp_committed == 0);
-    }
-  }
-  PrependToFreeList(span);
-}
-
-void PageHeap::PrependToFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-  SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_;
-  if (span->location == Span::ON_NORMAL_FREELIST) {
-    stats_.free_bytes += (span->length << kPageShift);
-    DLL_Prepend(&list->normal, span);
-  } else {
-    stats_.unmapped_bytes += (span->length << kPageShift);
-    DLL_Prepend(&list->returned, span);
-  }
-}
-
-void PageHeap::RemoveFromFreeList(Span* span) {
-  ASSERT(span->location != Span::IN_USE);
-  if (span->location == Span::ON_NORMAL_FREELIST) {
-    stats_.free_bytes -= (span->length << kPageShift);
-  } else {
-    stats_.unmapped_bytes -= (span->length << kPageShift);
-  }
-  DLL_Remove(span);
-}
-
-void PageHeap::IncrementalScavenge(Length n) {
-  // Fast path; not yet time to release memory
-  scavenge_counter_ -= n;
-  if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
-
-  const double rate = FLAGS_tcmalloc_release_rate;
-  if (rate <= 1e-6) {
-    // Tiny release rate means that releasing is disabled.
-    scavenge_counter_ = kDefaultReleaseDelay;
-    return;
-  }
-
-  Length released_pages = ReleaseAtLeastNPages(1);
-
-  if (released_pages == 0) {
-    // Nothing to scavenge, delay for a while.
-    scavenge_counter_ = kDefaultReleaseDelay;
-  } else {
-    // Compute how long to wait until we return memory.
-    // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
-    // after releasing one page.
-    const double mult = 1000.0 / rate;
-    double wait = mult * static_cast<double>(released_pages);
-    if (wait > kMaxReleaseDelay) {
-      // Avoid overflow and bound to reasonable range.
-      wait = kMaxReleaseDelay;
-    }
-    scavenge_counter_ = static_cast<int64_t>(wait);
-  }
-}
-
-Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) {
-  Span* s = slist->normal.prev;
-  ASSERT(s->location == Span::ON_NORMAL_FREELIST);
-
-  if (DecommitSpan(s)) {
-    RemoveFromFreeList(s);
-    const Length n = s->length;
-    s->location = Span::ON_RETURNED_FREELIST;
-    MergeIntoFreeList(s);  // Coalesces if possible.
-    return n;
-  }
-
-  return 0;
-}
-
-Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
-  Length released_pages = 0;
-
-  // Round robin through the lists of free spans, releasing the last
-  // span in each list.  Stop after releasing at least num_pages
-  // or when there is nothing more to release.
-  while (released_pages < num_pages && stats_.free_bytes > 0) {
-    for (int i = 0; i < kMaxPages+1 && released_pages < num_pages;
-         i++, release_index_++) {
-      if (release_index_ > kMaxPages) release_index_ = 0;
-      SpanList* slist = (release_index_ == kMaxPages) ?
-          &large_ : &free_[release_index_];
-      if (!DLL_IsEmpty(&slist->normal)) {
-        Length released_len = ReleaseLastNormalSpan(slist);
-        // Some systems do not support release
-        if (released_len == 0) return released_pages;
-        released_pages += released_len;
-      }
-    }
-  }
-  return released_pages;
-}
-
-bool PageHeap::EnsureLimit(Length n, bool withRelease)
-{
-  Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift;
-  if (limit == 0) return true; //there is no limit
-
-  // We do not use stats_.system_bytes because it does not take
-  // MetaDataAllocs into account.
-  Length takenPages = TCMalloc_SystemTaken >> kPageShift;
-  //XXX takenPages may be slightly bigger than limit for two reasons:
-  //* MetaDataAllocs ignore the limit (it is not easy to handle
-  //  out of memory there)
-  //* sys_alloc may round allocation up to huge page size,
-  //  although smaller limit was ensured
-
-  ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift);
-  takenPages -= stats_.unmapped_bytes >> kPageShift;
-
-  if (takenPages + n > limit && withRelease) {
-    takenPages -= ReleaseAtLeastNPages(takenPages + n - limit);
-  }
-
-  return takenPages + n <= limit;
-}
-
-void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
-  // Associate span object with all interior pages as well
-  ASSERT(span->location == Span::IN_USE);
-  ASSERT(GetDescriptor(span->start) == span);
-  ASSERT(GetDescriptor(span->start+span->length-1) == span);
-  Event(span, 'C', sc);
-  span->sizeclass = sc;
-  for (Length i = 1; i < span->length-1; i++) {
-    pagemap_.set(span->start+i, span);
-  }
-}
-
-void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
-  for (int s = 0; s < kMaxPages; s++) {
-    result->normal_length[s] = DLL_Length(&free_[s].normal);
-    result->returned_length[s] = DLL_Length(&free_[s].returned);
-  }
-}
-
-void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
-  result->spans = 0;
-  result->normal_pages = 0;
-  result->returned_pages = 0;
-  for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
-    result->normal_pages += s->length;;
-    result->spans++;
-  }
-  for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
-    result->returned_pages += s->length;
-    result->spans++;
-  }
-}
-
-bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) {
-  Span* span = reinterpret_cast<Span*>(pagemap_.Next(start));
-  if (span == NULL) {
-    return false;
-  }
-  r->address = span->start << kPageShift;
-  r->length = span->length << kPageShift;
-  r->fraction = 0;
-  switch (span->location) {
-    case Span::IN_USE:
-      r->type = base::MallocRange::INUSE;
-      r->fraction = 1;
-      if (span->sizeclass > 0) {
-        // Only some of the objects in this span may be in use.
-        const size_t osize = Static::sizemap()->class_to_size(span->sizeclass);
-        r->fraction = (1.0 * osize * span->refcount) / r->length;
-      }
-      break;
-    case Span::ON_NORMAL_FREELIST:
-      r->type = base::MallocRange::FREE;
-      break;
-    case Span::ON_RETURNED_FREELIST:
-      r->type = base::MallocRange::UNMAPPED;
-      break;
-    default:
-      r->type = base::MallocRange::UNKNOWN;
-      break;
-  }
-  return true;
-}
-
-static void RecordGrowth(size_t growth) {
-  StackTrace* t = Static::stacktrace_allocator()->New();
-  t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3);
-  t->size = growth;
-  t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks());
-  Static::set_growth_stacks(t);
-}
-
-bool PageHeap::GrowHeap(Length n) {
-  ASSERT(kMaxPages >= kMinSystemAlloc);
-  if (n > kMaxValidPages) return false;
-  Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
-  size_t actual_size;
-  void* ptr = NULL;
-  if (EnsureLimit(ask)) {
-      ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
-  }
-  if (ptr == NULL) {
-    if (n < ask) {
-      // Try growing just "n" pages
-      ask = n;
-      if (EnsureLimit(ask)) {
-        ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
-      }
-    }
-    if (ptr == NULL) return false;
-  }
-  ask = actual_size >> kPageShift;
-  RecordGrowth(ask << kPageShift);
-
-  uint64_t old_system_bytes = stats_.system_bytes;
-  stats_.system_bytes += (ask << kPageShift);
-  stats_.committed_bytes += (ask << kPageShift);
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  ASSERT(p > 0);
-
-  // If we have already a lot of pages allocated, just pre allocate a bunch of
-  // memory for the page map. This prevents fragmentation by pagemap metadata
-  // when a program keeps allocating and freeing large blocks.
-
-  if (old_system_bytes < kPageMapBigAllocationThreshold
-      && stats_.system_bytes >= kPageMapBigAllocationThreshold) {
-    pagemap_.PreallocateMoreMemory();
-  }
-
-  // Make sure pagemap_ has entries for all of the new pages.
-  // Plus ensure one before and one after so coalescing code
-  // does not need bounds-checking.
-  if (pagemap_.Ensure(p-1, ask+2)) {
-    // Pretend the new area is allocated and then Delete() it to cause
-    // any necessary coalescing to occur.
-    Span* span = NewSpan(p, ask);
-    RecordSpan(span);
-    Delete(span);
-    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
-    ASSERT(Check());
-    return true;
-  } else {
-    // We could not allocate memory within "pagemap_"
-    // TODO: Once we can return memory to the system, return the new span
-    return false;
-  }
-}
-
-bool PageHeap::Check() {
-  ASSERT(free_[0].normal.next == &free_[0].normal);
-  ASSERT(free_[0].returned.next == &free_[0].returned);
-  return true;
-}
-
-bool PageHeap::CheckExpensive() {
-  bool result = Check();
-  CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST);
-  CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST);
-  for (Length s = 1; s < kMaxPages; s++) {
-    CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST);
-    CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST);
-  }
-  return result;
-}
-
-bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages,
-                         int freelist) {
-  for (Span* s = list->next; s != list; s = s->next) {
-    CHECK_CONDITION(s->location == freelist);  // NORMAL or RETURNED
-    CHECK_CONDITION(s->length >= min_pages);
-    CHECK_CONDITION(s->length <= max_pages);
-    CHECK_CONDITION(GetDescriptor(s->start) == s);
-    CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
-  }
-  return true;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/page_heap.h b/contrib/libtcmalloc/src/page_heap.h
deleted file mode 100644
index 89fab81da69..00000000000
--- a/contrib/libtcmalloc/src/page_heap.h
+++ /dev/null
@@ -1,316 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_PAGE_HEAP_H_
-#define TCMALLOC_PAGE_HEAP_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint64_t, int64_t, uint16_t
-#endif
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "common.h"
-#include "packed-cache-inl.h"
-#include "pagemap.h"
-#include "span.h"
-
-// We need to dllexport PageHeap just for the unittest.  MSVC complains
-// that we don't dllexport the PageHeap members, but we don't need to
-// test those, so I just suppress this warning.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable:4251)
-#endif
-
-// This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
-// you're porting to a system where you really can't get a stacktrace.
-// Because we control the definition of GetStackTrace, all clients of
-// GetStackTrace should #include us rather than stacktrace.h.
-#ifdef NO_TCMALLOC_SAMPLES
-  // We use #define so code compiles even if you #include stacktrace.h somehow.
-# define GetStackTrace(stack, depth, skip)  (0)
-#else
-# include <gperftools/stacktrace.h>
-#endif
-
-namespace base {
-struct MallocRange;
-}
-
-namespace tcmalloc {
-
-// -------------------------------------------------------------------------
-// Map from page-id to per-page data
-// -------------------------------------------------------------------------
-
-// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
-// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
-// because sometimes the sizeclass is all the information we need.
-
-// Selector class -- general selector uses 3-level map
-template <int BITS> class MapSelector {
- public:
-  typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
-  typedef PackedCache<BITS-kPageShift, uint64_t> CacheType;
-};
-
-// A two-level map for 32-bit machines
-template <> class MapSelector<32> {
- public:
-  typedef TCMalloc_PageMap2<32-kPageShift> Type;
-  typedef PackedCache<32-kPageShift, uint16_t> CacheType;
-};
-
-// -------------------------------------------------------------------------
-// Page-level allocator
-//  * Eager coalescing
-//
-// Heap for page-level allocation.  We allow allocating and freeing a
-// contiguous runs of pages (called a "span").
-// -------------------------------------------------------------------------
-
-class PERFTOOLS_DLL_DECL PageHeap {
- public:
-  PageHeap();
-
-  // Allocate a run of "n" pages.  Returns zero if out of memory.
-  // Caller should not pass "n == 0" -- instead, n should have
-  // been rounded up already.
-  Span* New(Length n);
-
-  // Delete the span "[p, p+n-1]".
-  // REQUIRES: span was returned by earlier call to New() and
-  //           has not yet been deleted.
-  void Delete(Span* span);
-
-  // Mark an allocated span as being used for small objects of the
-  // specified size-class.
-  // REQUIRES: span was returned by an earlier call to New()
-  //           and has not yet been deleted.
-  void RegisterSizeClass(Span* span, size_t sc);
-
-  // Split an allocated span into two spans: one of length "n" pages
-  // followed by another span of length "span->length - n" pages.
-  // Modifies "*span" to point to the first span of length "n" pages.
-  // Returns a pointer to the second span.
-  //
-  // REQUIRES: "0 < n < span->length"
-  // REQUIRES: span->location == IN_USE
-  // REQUIRES: span->sizeclass == 0
-  Span* Split(Span* span, Length n);
-
-  // Return the descriptor for the specified page.  Returns NULL if
-  // this PageID was not allocated previously.
-  inline Span* GetDescriptor(PageID p) const {
-    return reinterpret_cast<Span*>(pagemap_.get(p));
-  }
-
-  // If this page heap is managing a range with starting page # >= start,
-  // store info about the range in *r and return true.  Else return false.
-  bool GetNextRange(PageID start, base::MallocRange* r);
-
-  // Page heap statistics
-  struct Stats {
-    Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0), committed_bytes(0) {}
-    uint64_t system_bytes;    // Total bytes allocated from system
-    uint64_t free_bytes;      // Total bytes on normal freelists
-    uint64_t unmapped_bytes;  // Total bytes on returned freelists
-    uint64_t committed_bytes;  // Bytes committed, always <= system_bytes_.
-
-  };
-  inline Stats stats() const { return stats_; }
-
-  struct SmallSpanStats {
-    // For each free list of small spans, the length (in spans) of the
-    // normal and returned free lists for that size.
-    int64 normal_length[kMaxPages];
-    int64 returned_length[kMaxPages];
-  };
-  void GetSmallSpanStats(SmallSpanStats* result);
-
-  // Stats for free large spans (i.e., spans with more than kMaxPages pages).
-  struct LargeSpanStats {
-    int64 spans;           // Number of such spans
-    int64 normal_pages;    // Combined page length of normal large spans
-    int64 returned_pages;  // Combined page length of unmapped spans
-  };
-  void GetLargeSpanStats(LargeSpanStats* result);
-
-  bool Check();
-  // Like Check() but does some more comprehensive checking.
-  bool CheckExpensive();
-  bool CheckList(Span* list, Length min_pages, Length max_pages,
-                 int freelist);  // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST
-
-  // Try to release at least num_pages for reuse by the OS.  Returns
-  // the actual number of pages released, which may be less than
-  // num_pages if there weren't enough pages to release. The result
-  // may also be larger than num_pages since page_heap might decide to
-  // release one large range instead of fragmenting it into two
-  // smaller released and unreleased ranges.
-  Length ReleaseAtLeastNPages(Length num_pages);
-
-  // Return 0 if we have no information, or else the correct sizeclass for p.
-  // Reads and writes to pagemap_cache_ do not require locking.
-  // The entries are 64 bits on 64-bit hardware and 16 bits on
-  // 32-bit hardware, and we don't mind raciness as long as each read of
-  // an entry yields a valid entry, not a partially updated entry.
-  size_t GetSizeClassIfCached(PageID p) const {
-    return pagemap_cache_.GetOrDefault(p, 0);
-  }
-  void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
-
-  bool GetAggressiveDecommit(void) {return aggressive_decommit_;}
-  void SetAggressiveDecommit(bool aggressive_decommit) {
-    aggressive_decommit_ = aggressive_decommit;
-  }
-
- private:
-  // Allocates a big block of memory for the pagemap once we reach more than
-  // 128MB
-  static const size_t kPageMapBigAllocationThreshold = 128 << 20;
-
-  // Minimum number of pages to fetch from system at a time.  Must be
-  // significantly bigger than kBlockSize to amortize system-call
-  // overhead, and also to reduce external fragementation.  Also, we
-  // should keep this value big because various incarnations of Linux
-  // have small limits on the number of mmap() regions per
-  // address-space.
-  // REQUIRED: kMinSystemAlloc <= kMaxPages;
-  static const int kMinSystemAlloc = kMaxPages;
-
-  // Never delay scavenging for more than the following number of
-  // deallocated pages.  With 4K pages, this comes to 4GB of
-  // deallocation.
-  static const int kMaxReleaseDelay = 1 << 20;
-
-  // If there is nothing to release, wait for so many pages before
-  // scavenging again.  With 4K pages, this comes to 1GB of memory.
-  static const int kDefaultReleaseDelay = 1 << 18;
-
-  // Pick the appropriate map and cache types based on pointer size
-  typedef MapSelector<kAddressBits>::Type PageMap;
-  typedef MapSelector<kAddressBits>::CacheType PageMapCache;
-  PageMap pagemap_;
-  mutable PageMapCache pagemap_cache_;
-
-  // We segregate spans of a given size into two circular linked
-  // lists: one for normal spans, and one for spans whose memory
-  // has been returned to the system.
-  struct SpanList {
-    Span        normal;
-    Span        returned;
-  };
-
-  // List of free spans of length >= kMaxPages
-  SpanList large_;
-
-  // Array mapping from span length to a doubly linked list of free spans
-  SpanList free_[kMaxPages];
-
-  // Statistics on system, free, and unmapped bytes
-  Stats stats_;
-
-  Span* SearchFreeAndLargeLists(Length n);
-
-  bool GrowHeap(Length n);
-
-  // REQUIRES: span->length >= n
-  // REQUIRES: span->location != IN_USE
-  // Remove span from its free list, and move any leftover part of
-  // span into appropriate free lists.  Also update "span" to have
-  // length exactly "n" and mark it as non-free so it can be returned
-  // to the client.  After all that, decrease free_pages_ by n and
-  // return span.
-  Span* Carve(Span* span, Length n);
-
-  void RecordSpan(Span* span) {
-    pagemap_.set(span->start, span);
-    if (span->length > 1) {
-      pagemap_.set(span->start + span->length - 1, span);
-    }
-  }
-
-  // Allocate a large span of length == n.  If successful, returns a
-  // span of exactly the specified length.  Else, returns NULL.
-  Span* AllocLarge(Length n);
-
-  // Coalesce span with neighboring spans if possible, prepend to
-  // appropriate free list, and adjust stats.
-  void MergeIntoFreeList(Span* span);
-
-  // Commit the span.
-  void CommitSpan(Span* span);
-
-  // Decommit the span.
-  bool DecommitSpan(Span* span);
-
-  // Prepends span to appropriate free list, and adjusts stats.
-  void PrependToFreeList(Span* span);
-
-  // Removes span from its free list, and adjust stats.
-  void RemoveFromFreeList(Span* span);
-
-  // Incrementally release some memory to the system.
-  // IncrementalScavenge(n) is called whenever n pages are freed.
-  void IncrementalScavenge(Length n);
-
-  // Release the last span on the normal portion of this list.
-  // Return the length of that span or zero if release failed.
-  Length ReleaseLastNormalSpan(SpanList* slist);
-
-  // Checks if we are allowed to take more memory from the system.
-  // If limit is reached and allowRelease is true, tries to release
-  // some unused spans.
-  bool EnsureLimit(Length n, bool allowRelease = true);
-
-  bool MayMergeSpans(Span *span, Span *other);
-
-  // Number of pages to deallocate before doing more scavenging
-  int64_t scavenge_counter_;
-
-  // Index of last free list where we released memory to the OS.
-  int release_index_;
-
-  bool aggressive_decommit_;
-};
-
-}  // namespace tcmalloc
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif  // TCMALLOC_PAGE_HEAP_H_
diff --git a/contrib/libtcmalloc/src/page_heap_allocator.h b/contrib/libtcmalloc/src/page_heap_allocator.h
deleted file mode 100644
index 892d1c1abe3..00000000000
--- a/contrib/libtcmalloc/src/page_heap_allocator.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
-#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
-
-#include <stddef.h>                     // for NULL, size_t
-
-#include "common.h"            // for MetaDataAlloc
-#include "internal_logging.h"  // for ASSERT
-
-namespace tcmalloc {
-
-// Simple allocator for objects of a specified type.  External locking
-// is required before accessing one of these objects.
-template <class T>
-class PageHeapAllocator {
- public:
-  // We use an explicit Init function because these variables are statically
-  // allocated and their constructors might not have run by the time some
-  // other static variable tries to allocate memory.
-  void Init() {
-    ASSERT(sizeof(T) <= kAllocIncrement);
-    inuse_ = 0;
-    free_area_ = NULL;
-    free_avail_ = 0;
-    free_list_ = NULL;
-    // Reserve some space at the beginning to avoid fragmentation.
-    Delete(New());
-  }
-
-  T* New() {
-    // Consult free list
-    void* result;
-    if (free_list_ != NULL) {
-      result = free_list_;
-      free_list_ = *(reinterpret_cast<void**>(result));
-    } else {
-      if (free_avail_ < sizeof(T)) {
-        // Need more room. We assume that MetaDataAlloc returns
-        // suitably aligned memory.
-        free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
-        if (free_area_ == NULL) {
-          Log(kCrash, __FILE__, __LINE__,
-              "FATAL ERROR: Out of memory trying to allocate internal "
-              "tcmalloc data (bytes, object-size)",
-              kAllocIncrement, sizeof(T));
-        }
-        free_avail_ = kAllocIncrement;
-      }
-      result = free_area_;
-      free_area_ += sizeof(T);
-      free_avail_ -= sizeof(T);
-    }
-    inuse_++;
-    return reinterpret_cast<T*>(result);
-  }
-
-  void Delete(T* p) {
-    *(reinterpret_cast<void**>(p)) = free_list_;
-    free_list_ = p;
-    inuse_--;
-  }
-
-  int inuse() const { return inuse_; }
-
- private:
-  // How much to allocate from system at a time
-  static const int kAllocIncrement = 128 << 10;
-
-  // Free area from which to carve new objects
-  char* free_area_;
-  size_t free_avail_;
-
-  // Free list of already carved objects
-  void* free_list_;
-
-  // Number of allocated but unfreed objects
-  int inuse_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
diff --git a/contrib/libtcmalloc/src/pagemap.h b/contrib/libtcmalloc/src/pagemap.h
deleted file mode 100644
index dd9442313af..00000000000
--- a/contrib/libtcmalloc/src/pagemap.h
+++ /dev/null
@@ -1,324 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A data structure used by the caching malloc.  It maps from page# to
-// a pointer that contains info about that page.  We use two
-// representations: one for 32-bit addresses, and another for 64 bit
-// addresses.  Both representations provide the same interface.  The
-// first representation is implemented as a flat array, the seconds as
-// a three-level radix tree that strips away approximately 1/3rd of
-// the bits every time.
-//
-// The BITS parameter should be the number of bits required to hold
-// a page number.  E.g., with 32 bit pointers and 4K pages (i.e.,
-// page offset fits in lower 12 bits), BITS == 20.
-
-#ifndef TCMALLOC_PAGEMAP_H_
-#define TCMALLOC_PAGEMAP_H_
-
-#include "config.h"
-
-#include <stddef.h>                     // for NULL, size_t
-#include <string.h>                     // for memset
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include "internal_logging.h"  // for ASSERT
-
-// Single-level array
-template <int BITS>
-class TCMalloc_PageMap1 {
- private:
-  static const int LENGTH = 1 << BITS;
-
-  void** array_;
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) {
-    array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS));
-    memset(array_, 0, sizeof(void*) << BITS);
-  }
-
-  // Ensure that the map contains initialized entries "x .. x+n-1".
-  // Returns true if successful, false if we could not allocate memory.
-  bool Ensure(Number x, size_t n) {
-    // Nothing to do since flat array was allocated at start.  All
-    // that's left is to check for overflow (that is, we don't want to
-    // ensure a number y where array_[y] would be an out-of-bounds
-    // access).
-    return n <= LENGTH - x;   // an overflow-free way to do "x + n <= LENGTH"
-  }
-
-  void PreallocateMoreMemory() {}
-
-  // Return the current value for KEY.  Returns NULL if not yet set,
-  // or if k is out of range.
-  void* get(Number k) const {
-    if ((k >> BITS) > 0) {
-      return NULL;
-    }
-    return array_[k];
-  }
-
-  // REQUIRES "k" is in range "[0,2^BITS-1]".
-  // REQUIRES "k" has been ensured before.
-  //
-  // Sets the value 'v' for key 'k'.
-  void set(Number k, void* v) {
-    array_[k] = v;
-  }
-
-  // Return the first non-NULL pointer found in this map for
-  // a page number >= k.  Returns NULL if no such number is found.
-  void* Next(Number k) const {
-    while (k < (1 << BITS)) {
-      if (array_[k] != NULL) return array_[k];
-      k++;
-    }
-    return NULL;
-  }
-};
-
-// Two-level radix tree
-template <int BITS>
-class TCMalloc_PageMap2 {
- private:
-  // Put 32 entries in the root and (2^BITS)/32 entries in each leaf.
-  static const int ROOT_BITS = 5;
-  static const int ROOT_LENGTH = 1 << ROOT_BITS;
-
-  static const int LEAF_BITS = BITS - ROOT_BITS;
-  static const int LEAF_LENGTH = 1 << LEAF_BITS;
-
-  // Leaf node
-  struct Leaf {
-    void* values[LEAF_LENGTH];
-  };
-
-  Leaf* root_[ROOT_LENGTH];             // Pointers to 32 child nodes
-  void* (*allocator_)(size_t);          // Memory allocator
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) {
-    allocator_ = allocator;
-    memset(root_, 0, sizeof(root_));
-  }
-
-  void* get(Number k) const {
-    const Number i1 = k >> LEAF_BITS;
-    const Number i2 = k & (LEAF_LENGTH-1);
-    if ((k >> BITS) > 0 || root_[i1] == NULL) {
-      return NULL;
-    }
-    return root_[i1]->values[i2];
-  }
-
-  void set(Number k, void* v) {
-    const Number i1 = k >> LEAF_BITS;
-    const Number i2 = k & (LEAF_LENGTH-1);
-    ASSERT(i1 < ROOT_LENGTH);
-    root_[i1]->values[i2] = v;
-  }
-
-  bool Ensure(Number start, size_t n) {
-    for (Number key = start; key <= start + n - 1; ) {
-      const Number i1 = key >> LEAF_BITS;
-
-      // Check for overflow
-      if (i1 >= ROOT_LENGTH)
-        return false;
-
-      // Make 2nd level node if necessary
-      if (root_[i1] == NULL) {
-        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
-        if (leaf == NULL) return false;
-        memset(leaf, 0, sizeof(*leaf));
-        root_[i1] = leaf;
-      }
-
-      // Advance key past whatever is covered by this leaf node
-      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
-    }
-    return true;
-  }
-
-  void PreallocateMoreMemory() {
-    // Allocate enough to keep track of all possible pages
-    Ensure(0, 1 << BITS);
-  }
-
-  void* Next(Number k) const {
-    while (k < (1 << BITS)) {
-      const Number i1 = k >> LEAF_BITS;
-      Leaf* leaf = root_[i1];
-      if (leaf != NULL) {
-        // Scan forward in leaf
-        for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) {
-          if (leaf->values[i2] != NULL) {
-            return leaf->values[i2];
-          }
-        }
-      }
-      // Skip to next top-level entry
-      k = (i1 + 1) << LEAF_BITS;
-    }
-    return NULL;
-  }
-};
-
-// Three-level radix tree
-template <int BITS>
-class TCMalloc_PageMap3 {
- private:
-  // How many bits should we consume at each interior level
-  static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up
-  static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS;
-
-  // How many bits should we consume at leaf level
-  static const int LEAF_BITS = BITS - 2*INTERIOR_BITS;
-  static const int LEAF_LENGTH = 1 << LEAF_BITS;
-
-  // Interior node
-  struct Node {
-    Node* ptrs[INTERIOR_LENGTH];
-  };
-
-  // Leaf node
-  struct Leaf {
-    void* values[LEAF_LENGTH];
-  };
-
-  Node* root_;                          // Root of radix tree
-  void* (*allocator_)(size_t);          // Memory allocator
-
-  Node* NewNode() {
-    Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node)));
-    if (result != NULL) {
-      memset(result, 0, sizeof(*result));
-    }
-    return result;
-  }
-
- public:
-  typedef uintptr_t Number;
-
-  explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) {
-    allocator_ = allocator;
-    root_ = NewNode();
-  }
-
-  void* get(Number k) const {
-    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-    const Number i3 = k & (LEAF_LENGTH-1);
-    if ((k >> BITS) > 0 ||
-        root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) {
-      return NULL;
-    }
-    return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3];
-  }
-
-  void set(Number k, void* v) {
-    ASSERT(k >> BITS == 0);
-    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-    const Number i3 = k & (LEAF_LENGTH-1);
-    reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v;
-  }
-
-  bool Ensure(Number start, size_t n) {
-    for (Number key = start; key <= start + n - 1; ) {
-      const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS);
-      const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-
-      // Check for overflow
-      if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH)
-        return false;
-
-      // Make 2nd level node if necessary
-      if (root_->ptrs[i1] == NULL) {
-        Node* n = NewNode();
-        if (n == NULL) return false;
-        root_->ptrs[i1] = n;
-      }
-
-      // Make leaf node if necessary
-      if (root_->ptrs[i1]->ptrs[i2] == NULL) {
-        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
-        if (leaf == NULL) return false;
-        memset(leaf, 0, sizeof(*leaf));
-        root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf);
-      }
-
-      // Advance key past whatever is covered by this leaf node
-      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
-    }
-    return true;
-  }
-
-  void PreallocateMoreMemory() {
-  }
-
-  void* Next(Number k) const {
-    while (k < (Number(1) << BITS)) {
-      const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
-      const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
-      if (root_->ptrs[i1] == NULL) {
-        // Advance to next top-level entry
-        k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS);
-      } else {
-        Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]);
-        if (leaf != NULL) {
-          for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) {
-            if (leaf->values[i3] != NULL) {
-              return leaf->values[i3];
-            }
-          }
-        }
-        // Advance to next interior entry
-        k = ((k >> LEAF_BITS) + 1) << LEAF_BITS;
-      }
-    }
-    return NULL;
-  }
-};
-
-#endif  // TCMALLOC_PAGEMAP_H_
diff --git a/contrib/libtcmalloc/src/raw_printer.cc b/contrib/libtcmalloc/src/raw_printer.cc
deleted file mode 100644
index 3cf028eeae0..00000000000
--- a/contrib/libtcmalloc/src/raw_printer.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: sanjay@google.com (Sanjay Ghemawat)
-
-#include <config.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include "raw_printer.h"
-#include "base/logging.h"
-
-namespace base {
-
-RawPrinter::RawPrinter(char* buf, int length)
-    : base_(buf),
-      ptr_(buf),
-      limit_(buf + length - 1) {
-  RAW_DCHECK(length > 0, "");
-  *ptr_ = '\0';
-  *limit_ = '\0';
-}
-
-void RawPrinter::Printf(const char* format, ...) {
-  if (limit_ > ptr_) {
-    va_list ap;
-    va_start(ap, format);
-    int avail = limit_ - ptr_;
-    // We pass avail+1 to vsnprintf() since that routine needs room
-    // to store the trailing \0.
-    const int r = perftools_vsnprintf(ptr_, avail+1, format, ap);
-    va_end(ap);
-    if (r < 0) {
-      // Perhaps an old glibc that returns -1 on truncation?
-      ptr_ = limit_;
-    } else if (r > avail) {
-      // Truncation
-      ptr_ = limit_;
-    } else {
-      ptr_ += r;
-    }
-  }
-}
-
-}
diff --git a/contrib/libtcmalloc/src/raw_printer.h b/contrib/libtcmalloc/src/raw_printer.h
deleted file mode 100644
index 9288bb5eeaa..00000000000
--- a/contrib/libtcmalloc/src/raw_printer.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// A printf() wrapper that writes into a fixed length buffer.
-// Useful in low-level code that does not want to use allocating
-// routines like StringPrintf().
-//
-// The implementation currently uses vsnprintf().  This seems to
-// be fine for use in many low-level contexts, but we may need to
-// rethink this decision if we hit a problem with it calling
-// down into malloc() etc.
-
-#ifndef BASE_RAW_PRINTER_H_
-#define BASE_RAW_PRINTER_H_
-
-#include <config.h>
-#include "base/basictypes.h"
-
-namespace base {
-
-class RawPrinter {
- public:
-  // REQUIRES: "length > 0"
-  // Will printf any data added to this into "buf[0,length-1]" and
-  // will arrange to always keep buf[] null-terminated.
-  RawPrinter(char* buf, int length);
-
-  // Return the number of bytes that have been appended to the string
-  // so far.  Does not count any bytes that were dropped due to overflow.
-  int length() const { return (ptr_ - base_); }
-
-  // Return the number of bytes that can be added to this.
-  int space_left() const { return (limit_ - ptr_); }
-
-  // Format the supplied arguments according to the "format" string
-  // and append to this.  Will silently truncate the output if it does
-  // not fit.
-  void Printf(const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-  __attribute__ ((__format__ (__printf__, 2, 3)))
-#endif
-;
-
- private:
-  // We can write into [ptr_ .. limit_-1].
-  // *limit_ is also writable, but reserved for a terminating \0
-  // in case we overflow.
-  //
-  // Invariants: *ptr_ == \0
-  // Invariants: *limit_ == \0
-  char* base_;          // Initial pointer
-  char* ptr_;           // Where should we write next
-  char* limit_;         // One past last non-\0 char we can write
-
-  DISALLOW_COPY_AND_ASSIGN(RawPrinter);
-};
-
-}
-
-#endif  // BASE_RAW_PRINTER_H_
diff --git a/contrib/libtcmalloc/src/sampler.cc b/contrib/libtcmalloc/src/sampler.cc
deleted file mode 100644
index cc711123340..00000000000
--- a/contrib/libtcmalloc/src/sampler.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Daniel Ford
-
-#include "sampler.h"
-
-#include <algorithm>  // For min()
-#include <math.h>
-#include "base/commandlineflags.h"
-
-using std::min;
-
-// The approximate gap in bytes between sampling actions.
-// I.e., we take one sample approximately once every
-// tcmalloc_sample_parameter bytes of allocation
-// i.e. about once every 512KB if value is 1<<19.
-#ifdef NO_TCMALLOC_SAMPLES
-DEFINE_int64(tcmalloc_sample_parameter, 0,
-             "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
-#else
-DEFINE_int64(tcmalloc_sample_parameter,
-             EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0),
-             "The approximate gap in bytes between sampling actions. "
-             "This must be between 1 and 2^58.");
-#endif
-
-namespace tcmalloc {
-
-// Statics for Sampler
-double Sampler::log_table_[1<<kFastlogNumBits];
-
-// Populate the lookup table for FastLog2.
-// This approximates the log2 curve with a step function.
-// Steps have height equal to log2 of the mid-point of the step.
-void Sampler::PopulateFastLog2Table() {
-  for (int i = 0; i < (1<<kFastlogNumBits); i++) {
-    log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits))
-                     / log(2.0));
-  }
-}
-
-int Sampler::GetSamplePeriod() {
-  return FLAGS_tcmalloc_sample_parameter;
-}
-
-// Run this before using your sampler
-void Sampler::Init(uint32_t seed) {
-  // Initialize PRNG
-  if (seed != 0) {
-    rnd_ = seed;
-  } else {
-    rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
-    if (rnd_ == 0) {
-      rnd_ = 1;
-    }
-  }
-  // Step it forward 20 times for good measure
-  for (int i = 0; i < 20; i++) {
-    rnd_ = NextRandom(rnd_);
-  }
-  // Initialize counter
-  bytes_until_sample_ = PickNextSamplingPoint();
-}
-
-// Initialize the Statics for the Sampler class
-void Sampler::InitStatics() {
-  PopulateFastLog2Table();
-}
-
-// Generates a geometric variable with the specified mean (512K by default).
-// This is done by generating a random number between 0 and 1 and applying
-// the inverse cumulative distribution function for an exponential.
-// Specifically: Let m be the inverse of the sample period, then
-// the probability distribution function is m*exp(-mx) so the CDF is
-// p = 1 - exp(-mx), so
-// q = 1 - p = exp(-mx)
-// log_e(q) = -mx
-// -log_e(q)/m = x
-// log_2(q) * (-log_e(2) * 1/m) = x
-// In the code, q is actually in the range 1 to 2**26, hence the -26 below
-size_t Sampler::PickNextSamplingPoint() {
-  rnd_ = NextRandom(rnd_);
-  // Take the top 26 bits as the random number
-  // (This plus the 1<<58 sampling bound give a max possible step of
-  // 5194297183973780480 bytes.)
-  const uint64_t prng_mod_power = 48;  // Number of bits in prng
-  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
-  // under piii debug for some binaries.
-  double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
-  // Put the computed p-value through the CDF of a geometric.
-  // For faster performance (save ~1/20th exec time), replace
-  // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705)
-  // The value 26.000705 is used rather than 26 to compensate
-  // for inaccuracies in FastLog2 which otherwise result in a
-  // negative answer.
-  return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0)
-                             * FLAGS_tcmalloc_sample_parameter) + 1);
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/sampler.h b/contrib/libtcmalloc/src/sampler.h
deleted file mode 100644
index eb316d7493d..00000000000
--- a/contrib/libtcmalloc/src/sampler.h
+++ /dev/null
@@ -1,180 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// All Rights Reserved.
-//
-// Author: Daniel Ford
-
-#ifndef TCMALLOC_SAMPLER_H_
-#define TCMALLOC_SAMPLER_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint64_t, uint32_t, int32_t
-#endif
-#include <string.h>                     // for memcpy
-#include "base/basictypes.h"  // for ASSERT
-#include "internal_logging.h"  // for ASSERT
-
-namespace tcmalloc {
-
-//-------------------------------------------------------------------
-// Sampler to decide when to create a sample trace for an allocation
-// Not thread safe: Each thread should have it's own sampler object.
-// Caller must use external synchronization if used
-// from multiple threads.
-//
-// With 512K average sample step (the default):
-//  the probability of sampling a 4K allocation is about 0.00778
-//  the probability of sampling a 1MB allocation is about 0.865
-//  the probability of sampling a 1GB allocation is about 1.00000
-// In general, the probablity of sampling is an allocation of size X
-// given a flag value of Y (default 1M) is:
-//  1 - e^(-X/Y)
-//
-// With 128K average sample step:
-//  the probability of sampling a 1MB allocation is about 0.99966
-//  the probability of sampling a 1GB allocation is about 1.0
-//  (about 1 - 2**(-26))
-// With 1M average sample step:
-//  the probability of sampling a 4K allocation is about 0.00390
-//  the probability of sampling a 1MB allocation is about 0.632
-//  the probability of sampling a 1GB allocation is about 1.0
-//
-// The sampler works by representing memory as a long stream from
-// which allocations are taken. Some of the bytes in this stream are
-// marked and if an allocation includes a marked byte then it is
-// sampled. Bytes are marked according to a Poisson point process
-// with each byte being marked independently with probability
-// p = 1/tcmalloc_sample_parameter.  This makes the probability
-// of sampling an allocation of X bytes equal to the CDF of
-// a geometric with mean tcmalloc_sample_parameter. (ie. the
-// probability that at least one byte in the range is marked). This
-// is accurately given by the CDF of the corresponding exponential
-// distribution : 1 - e^(X/tcmalloc_sample_parameter_)
-// Independence of the byte marking ensures independence of
-// the sampling of each allocation.
-//
-// This scheme is implemented by noting that, starting from any
-// fixed place, the number of bytes until the next marked byte
-// is geometrically distributed. This number is recorded as
-// bytes_until_sample_.  Every allocation subtracts from this
-// number until it is less than 0. When this happens the current
-// allocation is sampled.
-//
-// When an allocation occurs, bytes_until_sample_ is reset to
-// a new independtly sampled geometric number of bytes. The
-// memoryless property of the point process means that this may
-// be taken as the number of bytes after the end of the current
-// allocation until the next marked byte. This ensures that
-// very large allocations which would intersect many marked bytes
-// only result in a single call to PickNextSamplingPoint.
-//-------------------------------------------------------------------
-
-class PERFTOOLS_DLL_DECL Sampler {
- public:
-  // Initialize this sampler.
-  // Passing a seed of 0 gives a non-deterministic
-  // seed value given by casting the object ("this")
-  void Init(uint32_t seed);
-  void Cleanup();
-
-  // Record allocation of "k" bytes.  Return true iff allocation
-  // should be sampled
-  bool SampleAllocation(size_t k);
-
-  // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
-  size_t PickNextSamplingPoint();
-
-  // Initialize the statics for the Sampler class
-  static void InitStatics();
-
-  // Returns the current sample period
-  int GetSamplePeriod();
-
-  // The following are public for the purposes of testing
-  static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
-  static double FastLog2(const double & d);  // Computes Log2(x) quickly
-  static void PopulateFastLog2Table();  // Populate the lookup table
-
- private:
-  size_t        bytes_until_sample_;    // Bytes until we sample next
-  uint64_t      rnd_;                   // Cheap random number generator
-
-  // Statics for the fast log
-  // Note that this code may not depend on anything in //util
-  // hence the duplication of functionality here
-  static const int kFastlogNumBits = 10;
-  static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
-  static double log_table_[1<<kFastlogNumBits];  // Constant
-};
-
-inline bool Sampler::SampleAllocation(size_t k) {
-  if (bytes_until_sample_ < k) {
-    bytes_until_sample_ = PickNextSamplingPoint();
-    return true;
-  } else {
-    bytes_until_sample_ -= k;
-    return false;
-  }
-}
-
-// Inline functions which are public for testing purposes
-
-// Returns the next prng value.
-// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
-// This is the lrand64 generator.
-inline uint64_t Sampler::NextRandom(uint64_t rnd) {
-  const uint64_t prng_mult = 0x5DEECE66DLL;
-  const uint64_t prng_add = 0xB;
-  const uint64_t prng_mod_power = 48;
-  const uint64_t prng_mod_mask =
-                ~((~static_cast<uint64_t>(0)) << prng_mod_power);
-  return (prng_mult * rnd + prng_add) & prng_mod_mask;
-}
-
-// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
-// This mimics the VeryFastLog2 code in those files
-inline double Sampler::FastLog2(const double & d) {
-  ASSERT(d>0);
-  COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
-  uint64_t x;
-  memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
-  const uint32_t x_high = x >> 32;
-  const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
-  const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
-  return exponent + log_table_[y];
-}
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_SAMPLER_H_
diff --git a/contrib/libtcmalloc/src/span.cc b/contrib/libtcmalloc/src/span.cc
deleted file mode 100644
index 5f7ae436086..00000000000
--- a/contrib/libtcmalloc/src/span.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#include "config.h"
-#include "span.h"
-
-#include <string.h>                     // for NULL, memset
-
-#include "internal_logging.h"  // for ASSERT
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-
-namespace tcmalloc {
-
-#ifdef SPAN_HISTORY
-void Event(Span* span, char op, int v = 0) {
-  span->history[span->nexthistory] = op;
-  span->value[span->nexthistory] = v;
-  span->nexthistory++;
-  if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
-}
-#endif
-
-Span* NewSpan(PageID p, Length len) {
-  Span* result = Static::span_allocator()->New();
-  memset(result, 0, sizeof(*result));
-  result->start = p;
-  result->length = len;
-#ifdef SPAN_HISTORY
-  result->nexthistory = 0;
-#endif
-  return result;
-}
-
-void DeleteSpan(Span* span) {
-#ifndef NDEBUG
-  // In debug mode, trash the contents of deleted Spans
-  memset(span, 0x3f, sizeof(*span));
-#endif
-  Static::span_allocator()->Delete(span);
-}
-
-void DLL_Init(Span* list) {
-  list->next = list;
-  list->prev = list;
-}
-
-void DLL_Remove(Span* span) {
-  span->prev->next = span->next;
-  span->next->prev = span->prev;
-  span->prev = NULL;
-  span->next = NULL;
-}
-
-int DLL_Length(const Span* list) {
-  int result = 0;
-  for (Span* s = list->next; s != list; s = s->next) {
-    result++;
-  }
-  return result;
-}
-
-void DLL_Prepend(Span* list, Span* span) {
-  ASSERT(span->next == NULL);
-  ASSERT(span->prev == NULL);
-  span->next = list->next;
-  span->prev = list;
-  list->next->prev = span;
-  list->next = span;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/span.h b/contrib/libtcmalloc/src/span.h
deleted file mode 100644
index 3fe30ba33d0..00000000000
--- a/contrib/libtcmalloc/src/span.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A Span is a contiguous run of pages.
-
-#ifndef TCMALLOC_SPAN_H_
-#define TCMALLOC_SPAN_H_
-
-#include "config.h"
-#include "common.h"
-
-namespace tcmalloc {
-
-// Information kept for a span (a contiguous run of pages).
-struct Span {
-  PageID        start;          // Starting page number
-  Length        length;         // Number of pages in span
-  Span*         next;           // Used when in link list
-  Span*         prev;           // Used when in link list
-  void*         objects;        // Linked list of free objects
-  unsigned int  refcount : 16;  // Number of non-free objects
-  unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
-  unsigned int  location : 2;   // Is the span on a freelist, and if so, which?
-  unsigned int  sample : 1;     // Sampled object?
-
-#undef SPAN_HISTORY
-#ifdef SPAN_HISTORY
-  // For debugging, we can keep a log events per span
-  int nexthistory;
-  char history[64];
-  int value[64];
-#endif
-
-  // What freelist the span is on: IN_USE if on none, or normal or returned
-  enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
-};
-
-#ifdef SPAN_HISTORY
-void Event(Span* span, char op, int v = 0);
-#else
-#define Event(s,o,v) ((void) 0)
-#endif
-
-// Allocator/deallocator for spans
-Span* NewSpan(PageID p, Length len);
-void DeleteSpan(Span* span);
-
-// -------------------------------------------------------------------------
-// Doubly linked list of spans.
-// -------------------------------------------------------------------------
-
-// Initialize *list to an empty list.
-void DLL_Init(Span* list);
-
-// Remove 'span' from the linked list in which it resides, updating the
-// pointers of adjacent Spans and setting span's next and prev to NULL.
-void DLL_Remove(Span* span);
-
-// Return true iff "list" is empty.
-inline bool DLL_IsEmpty(const Span* list) {
-  return list->next == list;
-}
-
-// Add span to the front of list.
-void DLL_Prepend(Span* list, Span* span);
-
-// Return the length of the linked list. O(n)
-int DLL_Length(const Span* list);
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_SPAN_H_
diff --git a/contrib/libtcmalloc/src/stack_trace_table.cc b/contrib/libtcmalloc/src/stack_trace_table.cc
deleted file mode 100644
index 049cca524b5..00000000000
--- a/contrib/libtcmalloc/src/stack_trace_table.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Andrew Fikes
-
-#include "config.h"
-#include "stack_trace_table.h"
-#include <string.h>                     // for NULL, memset
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "common.h"            // for StackTrace
-#include "internal_logging.h"  // for ASSERT, Log
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "static_vars.h"       // for Static
-
-namespace tcmalloc {
-
-bool StackTraceTable::Bucket::KeyEqual(uintptr_t h,
-                                       const StackTrace& t) const {
-  const bool eq = (this->hash == h && this->trace.depth == t.depth);
-  for (int i = 0; eq && i < t.depth; ++i) {
-    if (this->trace.stack[i] != t.stack[i]) {
-      return false;
-    }
-  }
-  return eq;
-}
-
-StackTraceTable::StackTraceTable()
-    : error_(false),
-      depth_total_(0),
-      bucket_total_(0),
-      table_(new Bucket*[kHashTableSize]()) {
-  memset(table_, 0, kHashTableSize * sizeof(Bucket*));
-}
-
-StackTraceTable::~StackTraceTable() {
-  delete[] table_;
-}
-
-void StackTraceTable::AddTrace(const StackTrace& t) {
-  if (error_) {
-    return;
-  }
-
-  // Hash function borrowed from base/heap-profile-table.cc
-  uintptr_t h = 0;
-  for (int i = 0; i < t.depth; ++i) {
-    h += reinterpret_cast<uintptr_t>(t.stack[i]);
-    h += h << 10;
-    h ^= h >> 6;
-  }
-  h += h << 3;
-  h ^= h >> 11;
-
-  const int idx = h % kHashTableSize;
-
-  Bucket* b = table_[idx];
-  while (b != NULL && !b->KeyEqual(h, t)) {
-    b = b->next;
-  }
-  if (b != NULL) {
-    b->count++;
-    b->trace.size += t.size;  // keep cumulative size
-  } else {
-    depth_total_ += t.depth;
-    bucket_total_++;
-    b = Static::bucket_allocator()->New();
-    if (b == NULL) {
-      Log(kLog, __FILE__, __LINE__,
-          "tcmalloc: could not allocate bucket", sizeof(*b));
-      error_ = true;
-    } else {
-      b->hash = h;
-      b->trace = t;
-      b->count = 1;
-      b->next = table_[idx];
-      table_[idx] = b;
-    }
-  }
-}
-
-void** StackTraceTable::ReadStackTracesAndClear() {
-  if (error_) {
-    return NULL;
-  }
-
-  // Allocate output array
-  const int out_len = bucket_total_ * 3 + depth_total_ + 1;
-  void** out = new void*[out_len];
-  if (out == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed for stack traces",
-        out_len * sizeof(*out));
-    return NULL;
-  }
-
-  // Fill output array
-  int idx = 0;
-  for (int i = 0; i < kHashTableSize; ++i) {
-    Bucket* b = table_[i];
-    while (b != NULL) {
-      out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count));
-      out[idx++] = reinterpret_cast<void*>(b->trace.size);  // cumulative size
-      out[idx++] = reinterpret_cast<void*>(b->trace.depth);
-      for (int d = 0; d < b->trace.depth; ++d) {
-        out[idx++] = b->trace.stack[d];
-      }
-      b = b->next;
-    }
-  }
-  out[idx++] = NULL;
-  ASSERT(idx == out_len);
-
-  // Clear state
-  error_ = false;
-  depth_total_ = 0;
-  bucket_total_ = 0;
-  SpinLockHolder h(Static::pageheap_lock());
-  for (int i = 0; i < kHashTableSize; ++i) {
-    Bucket* b = table_[i];
-    while (b != NULL) {
-      Bucket* next = b->next;
-      Static::bucket_allocator()->Delete(b);
-      b = next;
-    }
-    table_[i] = NULL;
-  }
-
-  return out;
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/stack_trace_table.h b/contrib/libtcmalloc/src/stack_trace_table.h
deleted file mode 100644
index 66ed5d92822..00000000000
--- a/contrib/libtcmalloc/src/stack_trace_table.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Andrew Fikes
-//
-// Utility class for coalescing sampled stack traces.  Not thread-safe.
-
-#ifndef TCMALLOC_STACK_TRACE_TABLE_H_
-#define TCMALLOC_STACK_TRACE_TABLE_H_
-
-#include "config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t
-#endif
-#include "common.h"
-
-namespace tcmalloc {
-
-class PERFTOOLS_DLL_DECL StackTraceTable {
- public:
-  // REQUIRES: L < pageheap_lock
-  StackTraceTable();
-  ~StackTraceTable();
-
-  // Adds stack trace "t" to table.
-  //
-  // REQUIRES: L >= pageheap_lock
-  void AddTrace(const StackTrace& t);
-
-  // Returns stack traces formatted per MallocExtension guidelines.
-  // May return NULL on error.  Clears state before returning.
-  //
-  // REQUIRES: L < pageheap_lock
-  void** ReadStackTracesAndClear();
-
-  // Exposed for PageHeapAllocator
-  struct Bucket {
-    // Key
-    uintptr_t hash;
-    StackTrace trace;
-
-    // Payload
-    int count;
-    Bucket* next;
-
-    bool KeyEqual(uintptr_t h, const StackTrace& t) const;
-  };
-
-  // For testing
-  int depth_total() const { return depth_total_; }
-  int bucket_total() const { return bucket_total_; }
-
- private:
-  static const int kHashTableSize = 1 << 14; // => table_ is 128k
-
-  bool error_;
-  int depth_total_;
-  int bucket_total_;
-  Bucket** table_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_STACK_TRACE_TABLE_H_
diff --git a/contrib/libtcmalloc/src/stacktrace.cc b/contrib/libtcmalloc/src/stacktrace.cc
deleted file mode 100644
index 88c8b15946d..00000000000
--- a/contrib/libtcmalloc/src/stacktrace.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Produce stack trace.
-//
-// There are three different ways we can try to get the stack trace:
-//
-// 1) Our hand-coded stack-unwinder.  This depends on a certain stack
-//    layout, which is used by gcc (and those systems using a
-//    gcc-compatible ABI) on x86 systems, at least since gcc 2.95.
-//    It uses the frame pointer to do its work.
-//
-// 2) The libunwind library.  This is still in development, and as a
-//    separate library adds a new dependency, abut doesn't need a frame
-//    pointer.  It also doesn't call malloc.
-//
-// 3) The gdb unwinder -- also the one used by the c++ exception code.
-//    It's obviously well-tested, but has a fatal flaw: it can call
-//    malloc() from the unwinder.  This is a problem because we're
-//    trying to use the unwinder to instrument malloc().
-//
-// Note: if you add a new implementation here, make sure it works
-// correctly when GetStackTrace() is called with max_depth == 0.
-// Some code may do that.
-
-#include "config.h"
-#include <stdlib.h> // for getenv
-#include <string.h> // for strcmp
-#include <stdio.h> // for fprintf
-#include "gperftools/stacktrace.h"
-#include "base/commandlineflags.h"
-#include "base/googleinit.h"
-
-
-// we're using plain struct and not class to avoid any possible issues
-// during initialization. Struct of pointers is easy to init at
-// link-time.
-struct GetStackImplementation {
-  int (*GetStackFramesPtr)(void** result, int* sizes, int max_depth,
-                           int skip_count);
-
-  int (*GetStackFramesWithContextPtr)(void** result, int* sizes, int max_depth,
-                                      int skip_count, const void *uc);
-
-  int (*GetStackTracePtr)(void** result, int max_depth,
-                          int skip_count);
-
-  int (*GetStackTraceWithContextPtr)(void** result, int max_depth,
-                                  int skip_count, const void *uc);
-
-  const char *name;
-};
-
-#if HAVE_DECL_BACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h"
-#define GST_SUFFIX generic
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_generic
-#endif
-
-#ifdef HAVE_UNWIND_BACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_libgcc-inl.h"
-#define GST_SUFFIX libgcc
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_libgcc
-#endif
-
-// libunwind uses __thread so we check for both libunwind.h and
-// __thread support
-#if defined(HAVE_LIBUNWIND_H) && defined(HAVE_TLS)
-#define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h"
-#define GST_SUFFIX libunwind
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_libunwind
-#endif // HAVE_LIBUNWIND_H
-
-#if defined(__i386__) || defined(__x86_64__)
-#define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
-#define GST_SUFFIX x86
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_x86
-#endif // i386 || x86_64
-
-#if defined(__ppc__) || defined(__PPC__)
-#if defined(__linux__)
-#define STACKTRACE_INL_HEADER "stacktrace_powerpc-linux-inl.h"
-#else
-#define STACKTRACE_INL_HEADER "stacktrace_powerpc-darwin-inl.h"
-#endif
-#define GST_SUFFIX ppc
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_ppc
-#endif
-
-#if defined(__arm__)
-#define STACKTRACE_INL_HEADER "stacktrace_arm-inl.h"
-#define GST_SUFFIX arm
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_arm
-#endif
-
-#ifdef TCMALLOC_ENABLE_INSTRUMENT_STACKTRACE
-#define STACKTRACE_INL_HEADER "stacktrace_instrument-inl.h"
-#define GST_SUFFIX instrument
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_instrument
-#endif
-
-// The Windows case -- probably cygwin and mingw will use one of the
-// x86-includes above, but if not, we can fall back to windows intrinsics.
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
-#define STACKTRACE_INL_HEADER "stacktrace_win32-inl.h"
-#define GST_SUFFIX win32
-#include "stacktrace_impl_setup-inl.h"
-#undef GST_SUFFIX
-#undef STACKTRACE_INL_HEADER
-#define HAVE_GST_win32
-#endif
-
-static GetStackImplementation *all_impls[] = {
-#ifdef HAVE_GST_libgcc
-  &impl__libgcc,
-#endif
-#ifdef HAVE_GST_generic
-  &impl__generic,
-#endif
-#ifdef HAVE_GST_libunwind
-  &impl__libunwind,
-#endif
-#ifdef HAVE_GST_x86
-  &impl__x86,
-#endif
-#ifdef HAVE_GST_arm
-  &impl__arm,
-#endif
-#ifdef HAVE_GST_ppc
-  &impl__ppc,
-#endif
-#ifdef HAVE_GST_instrument
-  &impl__instrument,
-#endif
-#ifdef HAVE_GST_win32
-  &impl__win32,
-#endif
-  NULL
-};
-
-// ppc and i386 implementations prefer arch-specific asm implementations.
-// arm's asm implementation is broken
-#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__PPC__)
-#if !defined(NO_FRAME_POINTER)
-#define TCMALLOC_DONT_PREFER_LIBUNWIND
-#endif
-#endif
-
-static bool get_stack_impl_inited;
-
-#if defined(HAVE_GST_instrument)
-static GetStackImplementation *get_stack_impl = &impl__instrument;
-#elif defined(HAVE_GST_win32)
-static GetStackImplementation *get_stack_impl = &impl__win32;
-#elif defined(HAVE_GST_x86) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
-static GetStackImplementation *get_stack_impl = &impl__x86;
-#elif defined(HAVE_GST_ppc) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
-static GetStackImplementation *get_stack_impl = &impl__ppc;
-#elif defined(HAVE_GST_libunwind)
-static GetStackImplementation *get_stack_impl = &impl__libunwind;
-#elif defined(HAVE_GST_libgcc)
-static GetStackImplementation *get_stack_impl = &impl__libgcc;
-#elif defined(HAVE_GST_generic)
-static GetStackImplementation *get_stack_impl = &impl__generic;
-#elif defined(HAVE_GST_arm)
-static GetStackImplementation *get_stack_impl = &impl__arm;
-#elif 0
-// This is for the benefit of code analysis tools that may have
-// trouble with the computed #include above.
-# include "stacktrace_x86-inl.h"
-# include "stacktrace_libunwind-inl.h"
-# include "stacktrace_generic-inl.h"
-# include "stacktrace_powerpc-inl.h"
-# include "stacktrace_win32-inl.h"
-# include "stacktrace_arm-inl.h"
-# include "stacktrace_instrument-inl.h"
-#else
-#error Cannot calculate stack trace: will need to write for your environment
-#endif
-
-static int ATTRIBUTE_NOINLINE frame_forcer(int rv) {
-  return rv;
-}
-
-static void init_default_stack_impl_inner(void);
-
-namespace tcmalloc {
-  bool EnterStacktraceScope(void);
-  void LeaveStacktraceScope(void);
-}
-
-namespace {
-  using tcmalloc::EnterStacktraceScope;
-  using tcmalloc::LeaveStacktraceScope;
-
-  class StacktraceScope {
-    bool stacktrace_allowed;
-  public:
-    StacktraceScope() {
-      stacktrace_allowed = true;
-      stacktrace_allowed = EnterStacktraceScope();
-    }
-    bool IsStacktraceAllowed() {
-      return stacktrace_allowed;
-    }
-    ~StacktraceScope() {
-      if (stacktrace_allowed) {
-        LeaveStacktraceScope();
-      }
-    }
-  };
-}
-
-PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
-                                      int skip_count) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackFramesPtr(result, sizes, max_depth, skip_count));
-}
-
-PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
-                                                 int skip_count, const void *uc) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackFramesWithContextPtr(
-                        result, sizes, max_depth,
-                        skip_count, uc));
-}
-
-PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
-                                     int skip_count) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackTracePtr(result, max_depth, skip_count));
-}
-
-PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
-                                                int skip_count, const void *uc) {
-  StacktraceScope scope;
-  if (!scope.IsStacktraceAllowed()) {
-    return 0;
-  }
-  init_default_stack_impl_inner();
-  return frame_forcer(get_stack_impl->GetStackTraceWithContextPtr(
-                        result, max_depth, skip_count, uc));
-}
-
-static void init_default_stack_impl_inner(void) {
-  if (get_stack_impl_inited) {
-    return;
-  }
-  get_stack_impl_inited = true;
-  char *val = getenv("TCMALLOC_STACKTRACE_METHOD");
-  if (!val || !*val) {
-    return;
-  }
-  for (GetStackImplementation **p = all_impls; *p; p++) {
-    GetStackImplementation *c = *p;
-    if (strcmp(c->name, val) == 0) {
-      get_stack_impl = c;
-      return;
-    }
-  }
-  fprintf(stderr, "Unknown or unsupported stacktrace method requested: %s. Ignoring it\n", val);
-}
-
-static void init_default_stack_impl(void) {
-  init_default_stack_impl_inner();
-  if (EnvToBool("TCMALLOC_STACKTRACE_METHOD_VERBOSE", false)) {
-    fprintf(stderr, "Chosen stacktrace method is %s\nSupported methods:\n", get_stack_impl->name);
-    for (GetStackImplementation **p = all_impls; *p; p++) {
-      GetStackImplementation *c = *p;
-      fprintf(stderr, "* %s\n", c->name);
-    }
-    fputs("\n", stderr);
-  }
-}
-
-REGISTER_MODULE_INITIALIZER(stacktrace_init_default_stack_impl, init_default_stack_impl());
diff --git a/contrib/libtcmalloc/src/stacktrace_arm-inl.h b/contrib/libtcmalloc/src/stacktrace_arm-inl.h
deleted file mode 100644
index 1586b8fec62..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_arm-inl.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2011, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Doug Kwan
-// This is inspired by Craig Silverstein's PowerPC stacktrace code.
-//
-
-#ifndef BASE_STACKTRACE_ARM_INL_H_
-#define BASE_STACKTRACE_ARM_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <stdint.h>   // for uintptr_t
-#include "base/basictypes.h"  // for NULL
-#include <gperftools/stacktrace.h>
-
-// WARNING:
-// This only works if all your code is in either ARM or THUMB mode.  With
-// interworking, the frame pointer of the caller can either be in r11 (ARM
-// mode) or r7 (THUMB mode).  A callee only saves the frame pointer of its
-// mode in a fixed location on its stack frame.  If the caller is a different
-// mode, there is no easy way to find the frame pointer.  It can either be
-// still in the designated register or saved on stack along with other callee
-// saved registers.
-
-// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform sanity
-// checks (the strictness of which is controlled by the boolean parameter
-// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
-template<bool STRICT_UNWINDING>
-static void **NextStackFrame(void **old_sp) {
-  void **new_sp = (void**) old_sp[-1];
-
-  // Check that the transition from frame pointer old_sp to frame
-  // pointer new_sp isn't clearly bogus
-  if (STRICT_UNWINDING) {
-    // With the stack growing downwards, older stack frame must be
-    // at a greater address that the current one.
-    if (new_sp <= old_sp) return NULL;
-    // Assume stack frames larger than 100,000 bytes are bogus.
-    if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
-  } else {
-    // In the non-strict mode, allow discontiguous stack frames.
-    // (alternate-signal-stacks for example).
-    if (new_sp == old_sp) return NULL;
-    // And allow frames upto about 1MB.
-    if ((new_sp > old_sp)
-        && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL;
-  }
-  if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
-  return new_sp;
-}
-
-// This ensures that GetStackTrace stes up the Link Register properly.
-#ifdef __GNUC__
-void StacktraceArmDummyFunction() __attribute__((noinline));
-void StacktraceArmDummyFunction() { __asm__ volatile(""); }
-#else
-# error StacktraceArmDummyFunction() needs to be ported to this platform.
-#endif
-#endif  // BASE_STACKTRACE_ARM_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-#ifdef __GNUC__
-  void **sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#else
-# error reading stack point not yet supported on this platform.
-#endif
-
-  // On ARM, the return address is stored in the link register (r14).
-  // This is not saved on the stack frame of a leaf function.  To
-  // simplify code that reads return addresses, we call a dummy
-  // function so that the return address of this function is also
-  // stored in the stack frame.  This works at least for gcc.
-  StacktraceArmDummyFunction();
-
-  skip_count++; // skip parent frame due to indirection in stacktrace.cc
-
-  int n = 0;
-  while (sp && n < max_depth) {
-    // The GetStackFrames routine is called when we are in some
-    // informational context (the failure signal handler for example).
-    // Use the non-strict unwinding rules to produce a stack trace
-    // that is as complete as possible (even if it contains a few bogus
-    // entries in some rare cases).
-    void **next_sp = NextStackFrame<IS_STACK_FRAMES == 0>(sp);
-
-    if (skip_count > 0) {
-      skip_count--;
-    } else {
-      result[n] = *sp;
-
-#if IS_STACK_FRAMES
-      if (next_sp > sp) {
-        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
-      } else {
-        // A frame-size of 0 is used to indicate unknown frame size.
-        sizes[n] = 0;
-      }
-#endif
-      n++;
-    }
-    sp = next_sp;
-  }
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_generic-inl.h b/contrib/libtcmalloc/src/stacktrace_generic-inl.h
deleted file mode 100644
index 7d7c22d9e45..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_generic-inl.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Portable implementation - just use glibc
-//
-// Note:  The glibc implementation may cause a call to malloc.
-// This can cause a deadlock in HeapProfiler.
-
-#ifndef BASE_STACKTRACE_GENERIC_INL_H_
-#define BASE_STACKTRACE_GENERIC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <execinfo.h>
-#include <string.h>
-#include "gperftools/stacktrace.h"
-#endif  // BASE_STACKTRACE_GENERIC_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  static const int kStackLength = 64;
-  void * stack[kStackLength];
-  int size;
-
-  size = backtrace(stack, kStackLength);
-  skip_count += 2;  // we want to skip the current and it's parent frame as well
-  int result_count = size - skip_count;
-  if (result_count < 0)
-    result_count = 0;
-  if (result_count > max_depth)
-    result_count = max_depth;
-  for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes yet.
-  memset(sizes, 0, sizeof(*sizes) * result_count);
-#endif
-
-  return result_count;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h b/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h
deleted file mode 100644
index 698c5b38196..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_impl_setup-inl.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// NOTE: this is NOT to be #include-d normally. It's internal
-// implementation detail of stacktrace.cc
-//
-
-// Copyright (c) 2014, gperftools Contributors.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Aliaksey Kandratsenka <alk@tut.by>
-//
-//  based on stacktrace.cc and stacktrace_config.h by Sanjay Ghemawat
-//  and Paul Pluzhnikov from Google Inc
-
-#define SIS_CONCAT2(a, b) a##b
-#define SIS_CONCAT(a, b) SIS_CONCAT2(a,b)
-
-#define SIS_STRINGIFY(a) SIS_STRINGIFY2(a)
-#define SIS_STRINGIFY2(a) #a
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \
-                                                   int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \
-                                                    int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-static GetStackImplementation SIS_CONCAT(impl__,GST_SUFFIX) = {
-  SIS_CONCAT(GetStackFrames_, GST_SUFFIX),
-  SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX),
-  SIS_CONCAT(GetStackTrace_, GST_SUFFIX),
-  SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX),
-  SIS_STRINGIFY(GST_SUFFIX)
-};
-
-#undef SIS_CONCAT2
-#undef SIS_CONCAT
diff --git a/contrib/libtcmalloc/src/stacktrace_instrument-inl.h b/contrib/libtcmalloc/src/stacktrace_instrument-inl.h
deleted file mode 100644
index c631765c8a2..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_instrument-inl.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Jean Lee <xiaoyur347@gmail.com>
-// based on gcc Code-Gen-Options "-finstrument-functions" listed in
-// http://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html .
-// Should run configure with CXXFLAGS="-finstrument-functions".
-
-// This file is a backtrace implementation for systems :
-// * The glibc implementation of backtrace() may cause a call to malloc,
-//   and cause a deadlock in HeapProfiler.
-// * The libunwind implementation prints no backtrace.
-
-// The backtrace arrays are stored in "thread_back_trace" variable.
-// Maybe to use thread local storage is better and should save memorys.
-
-#ifndef BASE_STACKTRACE_INSTRUMENT_INL_H_
-#define BASE_STACKTRACE_INSTRUMENT_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include <execinfo.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include "gperftools/stacktrace.h"
-
-#define gettid() syscall(__NR_gettid)
-#ifndef __x86_64__
-#define MAX_THREAD (32768)
-#else
-#define MAX_THREAD (65536)
-#endif
-#define MAX_DEPTH  (30)
-#define ATTRIBUTE_NOINSTRUMENT __attribute__ ((no_instrument_function))
-
-typedef struct {
-  int   stack_depth;
-  void* frame[MAX_DEPTH];
-}BACK_TRACE;
-
-static BACK_TRACE thread_back_trace[MAX_THREAD];
-extern "C" {
-void __cyg_profile_func_enter(void *func_address,
-                              void *call_site) ATTRIBUTE_NOINSTRUMENT;
-void __cyg_profile_func_enter(void *func_address, void *call_site) {
-  (void)func_address;
-
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  backtrace->stack_depth = stack_depth + 1;
-  if ( stack_depth >= MAX_DEPTH ) {
-    return;
-  }
-  backtrace->frame[stack_depth] = call_site;
-}
-
-void __cyg_profile_func_exit(void *func_address,
-                             void *call_site) ATTRIBUTE_NOINSTRUMENT;
-void __cyg_profile_func_exit(void *func_address, void *call_site) {
-  (void)func_address;
-  (void)call_site;
-
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  backtrace->stack_depth = stack_depth - 1;
-  if ( stack_depth >= MAX_DEPTH ) {
-    return;
-  }
-  backtrace->frame[stack_depth] = 0;
-}
-}  // extern "C"
-
-static int cyg_backtrace(void **buffer, int size) {
-  BACK_TRACE* backtrace = thread_back_trace + gettid();
-  int stack_depth = backtrace->stack_depth;
-  if ( stack_depth >= MAX_DEPTH ) {
-    stack_depth = MAX_DEPTH;
-  }
-  int nSize = (size > stack_depth) ? stack_depth : size;
-  for (int i = 0; i < nSize; i++) {
-  buffer[i] = backtrace->frame[nSize - i - 1];
-  }
-
-  return nSize;
-}
-
-#endif  // BASE_STACKTRACE_INSTRUMENT_INL_H_
-
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  static const int kStackLength = 64;
-  void * stack[kStackLength];
-  int size;
-  memset(stack, 0, sizeof(stack));
-
-  size = cyg_backtrace(stack, kStackLength);
-  skip_count += 2;  // we want to skip the current and parent frame as well
-  int result_count = size - skip_count;
-  if (result_count < 0)
-    result_count = 0;
-  if (result_count > max_depth)
-    result_count = max_depth;
-  for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes yet.
-  memset(sizes, 0, sizeof(*sizes) * result_count);
-#endif
-
-  return result_count;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h b/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h
deleted file mode 100644
index ce9cf5196ad..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_libgcc-inl.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2016, gperftools Contributors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file implements backtrace capturing via libgcc's
-// _Unwind_Backtrace. This generally works almost always. It will fail
-// sometimes when we're trying to capture backtrace from signal
-// handler (i.e. in cpu profiler) while some C++ code is throwing
-// exception.
-
-#ifndef BASE_STACKTRACE_LIBGCC_INL_H_
-#define BASE_STACKTRACE_LIBGCC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-extern "C" {
-#include <assert.h>
-#include <string.h>   // for memset()
-}
-
-#include <unwind.h>
-
-#include "gperftools/stacktrace.h"
-
-struct libgcc_backtrace_data {
-  void **array;
-  int skip;
-  int pos;
-  int limit;
-};
-
-static _Unwind_Reason_Code libgcc_backtrace_helper(struct _Unwind_Context *ctx,
-                                                   void *_data) {
-  libgcc_backtrace_data *data =
-    reinterpret_cast<libgcc_backtrace_data *>(_data);
-
-  if (data->skip > 0) {
-    data->skip--;
-    return _URC_NO_REASON;
-  }
-
-  if (data->pos < data->limit) {
-    void *ip = reinterpret_cast<void *>(_Unwind_GetIP(ctx));;
-    data->array[data->pos++] = ip;
-  }
-
-  return _URC_NO_REASON;
-}
-
-#endif  // BASE_STACKTRACE_LIBGCC_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  libgcc_backtrace_data data;
-  data.array = result;
-  // we're also skipping current and parent's frame
-  data.skip = skip_count + 2;
-  data.pos = 0;
-  data.limit = max_depth;
-
-  _Unwind_Backtrace(libgcc_backtrace_helper, &data);
-
-  if (data.pos > 1 && data.array[data.pos - 1] == NULL)
-    --data.pos;
-
-#if IS_STACK_FRAMES
-  // No implementation for finding out the stack frame sizes.
-  memset(sizes, 0, sizeof(*sizes) * data.pos);
-#endif
-
-  return data.pos;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h b/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h
deleted file mode 100644
index e8257af6c2c..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_libunwind-inl.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Arun Sharma
-//
-// Produce stack trace using libunwind
-
-#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_
-#define BASE_STACKTRACE_LIBINWIND_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-// We only need local unwinder.
-#define UNW_LOCAL_ONLY
-
-extern "C" {
-#include <assert.h>
-#include <string.h>   // for memset()
-#include <libunwind.h>
-}
-#include "gperftools/stacktrace.h"
-
-#include "base/basictypes.h"
-#include "base/logging.h"
-
-// Sometimes, we can try to get a stack trace from within a stack
-// trace, because libunwind can call mmap (maybe indirectly via an
-// internal mmap based memory allocator), and that mmap gets trapped
-// and causes a stack-trace request.  If were to try to honor that
-// recursive request, we'd end up with infinite recursion or deadlock.
-// Luckily, it's safe to ignore those subsequent traces.  In such
-// cases, we return 0 to indicate the situation.
-static __thread int recursive ATTR_INITIAL_EXEC;
-
-#if defined(TCMALLOC_ENABLE_UNWIND_FROM_UCONTEXT) && (defined(__i386__) || defined(__x86_64__)) && defined(__GNU_LIBRARY__)
-#define BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT 1
-#endif
-
-#endif  // BASE_STACKTRACE_LIBINWIND_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-static int GET_STACK_TRACE_OR_FRAMES {
-  void *ip;
-  int n = 0;
-  unw_cursor_t cursor;
-  unw_context_t uc;
-#if IS_STACK_FRAMES
-  unw_word_t sp = 0, next_sp = 0;
-#endif
-
-  if (recursive) {
-    return 0;
-  }
-  ++recursive;
-
-#if (IS_WITH_CONTEXT && defined(BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT))
-  if (ucp) {
-    uc = *(static_cast<unw_context_t *>(const_cast<void *>(ucp)));
-    /* this is a bit weird. profiler.cc calls us with signal's ucontext
-     * yet passing us 2 as skip_count and essentially assuming we won't
-     * use ucontext. */
-    /* In order to fix that I'm going to assume that if ucp is
-     * non-null we're asked to ignore skip_count in case we're
-     * able to use ucp */
-    skip_count = 0;
-  } else {
-    unw_getcontext(&uc);
-    skip_count += 2;         // Do not include current and parent frame
-  }
-#else
-  unw_getcontext(&uc);
-  skip_count += 2;         // Do not include current and parent frame
-#endif
-
-  /*int ret =*/ unw_init_local(&cursor, &uc);
-  //assert(ret >= 0);
-
-  while (skip_count--) {
-    if (unw_step(&cursor) <= 0) {
-      goto out;
-    }
-#if IS_STACK_FRAMES
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
-      goto out;
-    }
-#endif
-  }
-
-  while (n < max_depth) {
-    if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
-      break;
-    }
-#if IS_STACK_FRAMES
-    sizes[n] = 0;
-#endif
-    result[n++] = ip;
-    if (unw_step(&cursor) <= 0) {
-      break;
-    }
-#if IS_STACK_FRAMES
-    sp = next_sp;
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) {
-      break;
-    }
-    sizes[n - 1] = next_sp - sp;
-#endif
-  }
-out:
-  --recursive;
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/stacktrace_x86-inl.h b/contrib/libtcmalloc/src/stacktrace_x86-inl.h
deleted file mode 100644
index 46eb5d82d71..00000000000
--- a/contrib/libtcmalloc/src/stacktrace_x86-inl.h
+++ /dev/null
@@ -1,354 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Produce stack trace
-
-#ifndef BASE_STACKTRACE_X86_INL_H_
-#define BASE_STACKTRACE_X86_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
-#include "config.h"
-#include <stdlib.h>   // for NULL
-#include <assert.h>
-#if defined(HAVE_SYS_UCONTEXT_H)
-#include <sys/ucontext.h>
-#elif defined(HAVE_UCONTEXT_H)
-#include <ucontext.h>  // for ucontext_t
-#elif defined(HAVE_CYGWIN_SIGNAL_H)
-// cygwin/signal.h has a buglet where it uses pthread_attr_t without
-// #including <pthread.h> itself.  So we have to do it.
-# ifdef HAVE_PTHREAD
-# include <pthread.h>
-# endif
-#include <cygwin/signal.h>
-typedef ucontext ucontext_t;
-#endif
-#ifdef HAVE_STDINT_H
-#include <stdint.h>   // for uintptr_t
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h> // for msync
-#include "base/vdso_support.h"
-#endif
-
-#include "gperftools/stacktrace.h"
-
-#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP)
-// Count "push %reg" instructions in VDSO __kernel_vsyscall(),
-// preceding "syscall" or "sysenter".
-// If __kernel_vsyscall uses frame pointer, answer 0.
-//
-// kMaxBytes tells how many instruction bytes of __kernel_vsyscall
-// to analyze before giving up. Up to kMaxBytes+1 bytes of
-// instructions could be accessed.
-//
-// Here are known __kernel_vsyscall instruction sequences:
-//
-// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S).
-// Used on Intel.
-//  0xffffe400 <__kernel_vsyscall+0>:       push   %ecx
-//  0xffffe401 <__kernel_vsyscall+1>:       push   %edx
-//  0xffffe402 <__kernel_vsyscall+2>:       push   %ebp
-//  0xffffe403 <__kernel_vsyscall+3>:       mov    %esp,%ebp
-//  0xffffe405 <__kernel_vsyscall+5>:       sysenter
-//
-// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S).
-// Used on AMD.
-//  0xffffe400 <__kernel_vsyscall+0>:       push   %ebp
-//  0xffffe401 <__kernel_vsyscall+1>:       mov    %ecx,%ebp
-//  0xffffe403 <__kernel_vsyscall+3>:       syscall
-//
-// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S)
-//  0xffffe400 <__kernel_vsyscall+0>:       int $0x80
-//  0xffffe401 <__kernel_vsyscall+1>:       ret
-//
-static const int kMaxBytes = 10;
-
-// We use assert()s instead of DCHECK()s -- this is too low level
-// for DCHECK().
-
-static int CountPushInstructions(const unsigned char *const addr) {
-  int result = 0;
-  for (int i = 0; i < kMaxBytes; ++i) {
-    if (addr[i] == 0x89) {
-      // "mov reg,reg"
-      if (addr[i + 1] == 0xE5) {
-        // Found "mov %esp,%ebp".
-        return 0;
-      }
-      ++i;  // Skip register encoding byte.
-    } else if (addr[i] == 0x0F &&
-               (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) {
-      // Found "sysenter" or "syscall".
-      return result;
-    } else if ((addr[i] & 0xF0) == 0x50) {
-      // Found "push %reg".
-      ++result;
-    } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) {
-      // Found "int $0x80"
-      assert(result == 0);
-      return 0;
-    } else {
-      // Unexpected instruction.
-      assert(0 == "unexpected instruction in __kernel_vsyscall");
-      return 0;
-    }
-  }
-  // Unexpected: didn't find SYSENTER or SYSCALL in
-  // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval.
-  assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall");
-  return 0;
-}
-#endif
-
-// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform sanity
-// checks (the strictness of which is controlled by the boolean parameter
-// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
-template<bool STRICT_UNWINDING, bool WITH_CONTEXT>
-static void **NextStackFrame(void **old_sp, const void *uc) {
-  void **new_sp = (void **) *old_sp;
-
-#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT)
-  if (WITH_CONTEXT && uc != NULL) {
-    // How many "push %reg" instructions are there at __kernel_vsyscall?
-    // This is constant for a given kernel and processor, so compute
-    // it only once.
-    static int num_push_instructions = -1;  // Sentinel: not computed yet.
-    // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly
-    // be there.
-    static const unsigned char *kernel_rt_sigreturn_address = NULL;
-    static const unsigned char *kernel_vsyscall_address = NULL;
-    if (num_push_instructions == -1) {
-      base::VDSOSupport vdso;
-      if (vdso.IsPresent()) {
-        base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info;
-        base::VDSOSupport::SymbolInfo vsyscall_symbol_info;
-        if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5",
-                               STT_FUNC, &rt_sigreturn_symbol_info) ||
-            !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5",
-                               STT_FUNC, &vsyscall_symbol_info) ||
-            rt_sigreturn_symbol_info.address == NULL ||
-            vsyscall_symbol_info.address == NULL) {
-          // Unexpected: 32-bit VDSO is present, yet one of the expected
-          // symbols is missing or NULL.
-          assert(0 == "VDSO is present, but doesn't have expected symbols");
-          num_push_instructions = 0;
-        } else {
-          kernel_rt_sigreturn_address =
-              reinterpret_cast<const unsigned char *>(
-                  rt_sigreturn_symbol_info.address);
-          kernel_vsyscall_address =
-              reinterpret_cast<const unsigned char *>(
-                  vsyscall_symbol_info.address);
-          num_push_instructions =
-              CountPushInstructions(kernel_vsyscall_address);
-        }
-      } else {
-        num_push_instructions = 0;
-      }
-    }
-    if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL &&
-        old_sp[1] == kernel_rt_sigreturn_address) {
-      const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
-      // This kernel does not use frame pointer in its VDSO code,
-      // and so %ebp is not suitable for unwinding.
-      void **const reg_ebp =
-          reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
-      const unsigned char *const reg_eip =
-          reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
-      if (new_sp == reg_ebp &&
-          kernel_vsyscall_address <= reg_eip &&
-          reg_eip - kernel_vsyscall_address < kMaxBytes) {
-        // We "stepped up" to __kernel_vsyscall, but %ebp is not usable.
-        // Restore from 'ucv' instead.
-        void **const reg_esp =
-            reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]);
-        // Check that alleged %esp is not NULL and is reasonably aligned.
-        if (reg_esp &&
-            ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) {
-          // Check that alleged %esp is actually readable. This is to prevent
-          // "double fault" in case we hit the first fault due to e.g. stack
-          // corruption.
-          //
-          // page_size is linker-initalized to avoid async-unsafe locking
-          // that GCC would otherwise insert (__cxa_guard_acquire etc).
-          static int page_size;
-          if (page_size == 0) {
-            // First time through.
-            page_size = getpagesize();
-          }
-          void *const reg_esp_aligned =
-              reinterpret_cast<void *>(
-                  (uintptr_t)(reg_esp + num_push_instructions - 1) &
-                  ~(page_size - 1));
-          if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) {
-            // Alleged %esp is readable, use it for further unwinding.
-            new_sp = reinterpret_cast<void **>(
-                reg_esp[num_push_instructions - 1]);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // Check that the transition from frame pointer old_sp to frame
-  // pointer new_sp isn't clearly bogus
-  if (STRICT_UNWINDING) {
-    // With the stack growing downwards, older stack frame must be
-    // at a greater address that the current one.
-    if (new_sp <= old_sp) return NULL;
-    // Assume stack frames larger than 100,000 bytes are bogus.
-    if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
-  } else {
-    // In the non-strict mode, allow discontiguous stack frames.
-    // (alternate-signal-stacks for example).
-    if (new_sp == old_sp) return NULL;
-    if (new_sp > old_sp) {
-      // And allow frames upto about 1MB.
-      const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp;
-      const uintptr_t acceptable_delta = 1000000;
-      if (delta > acceptable_delta) {
-        return NULL;
-      }
-    }
-  }
-  if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
-#ifdef __i386__
-  // On 64-bit machines, the stack pointer can be very close to
-  // 0xffffffff, so we explicitly check for a pointer into the
-  // last two pages in the address space
-  if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
-#endif
-#ifdef HAVE_MMAP
-  if (!STRICT_UNWINDING) {
-    // Lax sanity checks cause a crash on AMD-based machines with
-    // VDSO-enabled kernels.
-    // Make an extra sanity check to insure new_sp is readable.
-    // Note: NextStackFrame<false>() is only called while the program
-    //       is already on its last leg, so it's ok to be slow here.
-    static int page_size = getpagesize();
-    void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1));
-    if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1)
-      return NULL;
-  }
-#endif
-  return new_sp;
-}
-
-#endif  // BASE_STACKTRACE_X86_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-
-static int GET_STACK_TRACE_OR_FRAMES {
-  void **sp;
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
-  // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
-  // It's always correct on llvm, and the techniques below aren't (in
-  // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
-  // so we also prefer __builtin_frame_address when running under llvm.
-  sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#elif defined(__i386__)
-  // Stack frame format:
-  //    sp[0]   pointer to previous frame
-  //    sp[1]   caller address
-  //    sp[2]   first argument
-  //    ...
-  // NOTE: This will break under llvm, since result is a copy and not in sp[2]
-  sp = (void **)&result - 2;
-#elif defined(__x86_64__)
-  unsigned long rbp;
-  // Move the value of the register %rbp into the local variable rbp.
-  // We need 'volatile' to prevent this instruction from getting moved
-  // around during optimization to before function prologue is done.
-  // An alternative way to achieve this
-  // would be (before this __asm__ instruction) to call Noop() defined as
-  //   static void Noop() __attribute__ ((noinline));  // prevent inlining
-  //   static void Noop() { asm(""); }  // prevent optimizing-away
-  __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
-  // Arguments are passed in registers on x86-64, so we can't just
-  // offset from &result
-  sp = (void **) rbp;
-#else
-# error Using stacktrace_x86-inl.h on a non x86 architecture!
-#endif
-
-  skip_count++; // skip parent's frame due to indirection in stacktrace.cc
-
-  int n = 0;
-  while (sp && n < max_depth) {
-    if (*(sp+1) == reinterpret_cast<void *>(0)) {
-      // In 64-bit code, we often see a frame that
-      // points to itself and has a return address of 0.
-      break;
-    }
-#if !IS_WITH_CONTEXT
-    const void *const ucp = NULL;
-#endif
-    void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
-    if (skip_count > 0) {
-      skip_count--;
-    } else {
-      result[n] = *(sp+1);
-#if IS_STACK_FRAMES
-      if (next_sp > sp) {
-        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
-      } else {
-        // A frame-size of 0 is used to indicate unknown frame size.
-        sizes[n] = 0;
-      }
-#endif
-      n++;
-    }
-    sp = next_sp;
-  }
-  return n;
-}
diff --git a/contrib/libtcmalloc/src/static_vars.cc b/contrib/libtcmalloc/src/static_vars.cc
deleted file mode 100644
index 1e29d339996..00000000000
--- a/contrib/libtcmalloc/src/static_vars.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-
-#include "config.h"
-#include "static_vars.h"
-#include <stddef.h>                     // for NULL
-#include <new>                          // for operator new
-#ifdef HAVE_PTHREAD
-#include <pthread.h>                    // for pthread_atfork
-#endif
-#include "internal_logging.h"  // for CHECK_CONDITION
-#include "common.h"
-#include "sampler.h"           // for Sampler
-#include "getenv_safe.h"       // TCMallocGetenvSafe
-#include "base/googleinit.h"
-#include "maybe_threads.h"
-
-namespace tcmalloc {
-
-#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
-// These following two functions are registered via pthread_atfork to make
-// sure the central_cache locks remain in a consisten state in the forked
-// version of the thread.
-
-void CentralCacheLockAll()
-{
-  Static::pageheap_lock()->Lock();
-  for (int i = 0; i < kNumClasses; ++i)
-    Static::central_cache()[i].Lock();
-}
-
-void CentralCacheUnlockAll()
-{
-  for (int i = 0; i < kNumClasses; ++i)
-    Static::central_cache()[i].Unlock();
-  Static::pageheap_lock()->Unlock();
-}
-#endif
-
-SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
-SizeMap Static::sizemap_;
-CentralFreeListPadded Static::central_cache_[kNumClasses];
-PageHeapAllocator<Span> Static::span_allocator_;
-PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
-Span Static::sampled_objects_;
-PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_;
-StackTrace* Static::growth_stacks_ = NULL;
-PageHeap* Static::pageheap_ = NULL;
-
-
-void Static::InitStaticVars() {
-  sizemap_.Init();
-  span_allocator_.Init();
-  span_allocator_.New(); // Reduce cache conflicts
-  span_allocator_.New(); // Reduce cache conflicts
-  stacktrace_allocator_.Init();
-  bucket_allocator_.Init();
-  // Do a bit of sanitizing: make sure central_cache is aligned properly
-  CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0);
-  for (int i = 0; i < kNumClasses; ++i) {
-    central_cache_[i].Init(i);
-  }
-
-  // It's important to have PageHeap allocated, not in static storage,
-  // so that HeapLeakChecker does not consider all the byte patterns stored
-  // in is caches as pointers that are sources of heap object liveness,
-  // which leads to it missing some memory leaks.
-  pageheap_ = new (MetaDataAlloc(sizeof(PageHeap))) PageHeap;
-
-  bool aggressive_decommit =
-    tcmalloc::commandlineflags::StringToBool(
-      TCMallocGetenvSafe("TCMALLOC_AGGRESSIVE_DECOMMIT"), true);
-
-  pageheap_->SetAggressiveDecommit(aggressive_decommit);
-
-  DLL_Init(&sampled_objects_);
-  Sampler::InitStatics();
-}
-
-
-#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) && !defined(__APPLE__)
-
-static inline
-void SetupAtForkLocksHandler()
-{
-  perftools_pthread_atfork(
-    CentralCacheLockAll,    // parent calls before fork
-    CentralCacheUnlockAll,  // parent calls after fork
-    CentralCacheUnlockAll); // child calls after fork
-}
-REGISTER_MODULE_INITIALIZER(tcmalloc_fork_handler, SetupAtForkLocksHandler());
-
-#endif
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/static_vars.h b/contrib/libtcmalloc/src/static_vars.h
deleted file mode 100644
index d6dfa334ab2..00000000000
--- a/contrib/libtcmalloc/src/static_vars.h
+++ /dev/null
@@ -1,115 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-//
-// Static variables shared by multiple classes.
-
-#ifndef TCMALLOC_STATIC_VARS_H_
-#define TCMALLOC_STATIC_VARS_H_
-
-#include "config.h"
-#include "base/spinlock.h"
-#include "central_freelist.h"
-#include "common.h"
-#include "page_heap.h"
-#include "page_heap_allocator.h"
-#include "span.h"
-#include "stack_trace_table.h"
-
-namespace tcmalloc {
-
-class Static {
- public:
-  // Linker initialized, so this lock can be accessed at any time.
-  static SpinLock* pageheap_lock() { return &pageheap_lock_; }
-
-  // Must be called before calling any of the accessors below.
-  static void InitStaticVars();
-
-  // Central cache -- an array of free-lists, one per size-class.
-  // We have a separate lock per free-list to reduce contention.
-  static CentralFreeListPadded* central_cache() { return central_cache_; }
-
-  static SizeMap* sizemap() { return &sizemap_; }
-
-  //////////////////////////////////////////////////////////////////////
-  // In addition to the explicit initialization comment, the variables below
-  // must be protected by pageheap_lock.
-
-  // Page-level allocator.
-  static PageHeap* pageheap() { return pageheap_; }
-
-  static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; }
-
-  static PageHeapAllocator<StackTrace>* stacktrace_allocator() {
-    return &stacktrace_allocator_;
-  }
-
-  static StackTrace* growth_stacks() { return growth_stacks_; }
-  static void set_growth_stacks(StackTrace* s) { growth_stacks_ = s; }
-
-  // State kept for sampled allocations (/pprof/heap support)
-  static Span* sampled_objects() { return &sampled_objects_; }
-  static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() {
-    return &bucket_allocator_;
-  }
-
-  // Check if InitStaticVars() has been run.
-  static bool IsInited() { return pageheap() != NULL; }
-
- private:
-  static SpinLock pageheap_lock_;
-
-  // These static variables require explicit initialization.  We cannot
-  // count on their constructors to do any initialization because other
-  // static variables may try to allocate memory before these variables
-  // can run their constructors.
-
-  static SizeMap sizemap_;
-  static CentralFreeListPadded central_cache_[kNumClasses];
-  static PageHeapAllocator<Span> span_allocator_;
-  static PageHeapAllocator<StackTrace> stacktrace_allocator_;
-  static Span sampled_objects_;
-  static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
-
-  // Linked list of stack traces recorded every time we allocated memory
-  // from the system.  Useful for finding allocation sites that cause
-  // increase in the footprint of the system.  The linked list pointer
-  // is stored in trace->stack[kMaxStackDepth-1].
-  static StackTrace* growth_stacks_;
-
-  static PageHeap* pageheap_;
-};
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_STATIC_VARS_H_
diff --git a/contrib/libtcmalloc/src/symbolize.cc b/contrib/libtcmalloc/src/symbolize.cc
deleted file mode 100644
index a27106e8bce..00000000000
--- a/contrib/libtcmalloc/src/symbolize.cc
+++ /dev/null
@@ -1,285 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// This forks out to pprof to do the actual symbolizing.  We might
-// be better off writing our own in C++.
-
-#include "config.h"
-#include "symbolize.h"
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>   // for write()
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>   // for socketpair() -- needed by Symbolize
-#endif
-#ifdef HAVE_SYS_WAIT_H
-#include <sys/wait.h>   // for wait() -- needed by Symbolize
-#endif
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
-#ifdef __MACH__
-#include <mach-o/dyld.h>   // for GetProgramInvocationName()
-#include <limits.h>        // for PATH_MAX
-#endif
-#if defined(__CYGWIN__) || defined(__CYGWIN32__)
-#include <io.h>            // for get_osfhandle()
-#endif
-#include <string>
-#include "base/commandlineflags.h"
-#include "base/logging.h"
-#include "base/sysinfo.h"
-
-using std::string;
-using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
-
-
-DEFINE_string(symbolize_pprof,
-              EnvToString("PPROF_PATH", "pprof"),
-              "Path to pprof to call for reporting function names.");
-
-// heap_profile_table_pprof may be referenced after destructors are
-// called (since that's when leak-checking is done), so we make
-// a more-permanent copy that won't ever get destroyed.
-static string* g_pprof_path = new string(FLAGS_symbolize_pprof);
-
-// Returns NULL if we're on an OS where we can't get the invocation name.
-// Using a static var is ok because we're not called from a thread.
-static const char* GetProgramInvocationName() {
-#if defined(HAVE_PROGRAM_INVOCATION_NAME)
-#ifdef __UCLIBC__
-  extern const char* program_invocation_name; // uclibc provides this
-#else
-  extern char* program_invocation_name;  // gcc provides this
-#endif
-  return program_invocation_name;
-#elif defined(__MACH__)
-  // We don't want to allocate memory for this since we may be
-  // calculating it when memory is corrupted.
-  static char program_invocation_name[PATH_MAX];
-  if (program_invocation_name[0] == '\0') {  // first time calculating
-    uint32_t length = sizeof(program_invocation_name);
-    if (_NSGetExecutablePath(program_invocation_name, &length))
-      return NULL;
-  }
-  return program_invocation_name;
-#else
-  return NULL;   // figure out a way to get argv[0]
-#endif
-}
-
-// Prints an error message when you can't run Symbolize().
-static void PrintError(const char* reason) {
-  RAW_LOG(ERROR,
-          "*** WARNING: Cannot convert addresses to symbols in output below.\n"
-          "*** Reason: %s\n"
-          "*** If you cannot fix this, try running pprof directly.\n",
-          reason);
-}
-
-void SymbolTable::Add(const void* addr) {
-  symbolization_table_[addr] = "";
-}
-
-const char* SymbolTable::GetSymbol(const void* addr) {
-  return symbolization_table_[addr];
-}
-
-// Updates symbolization_table with the pointers to symbol names corresponding
-// to its keys. The symbol names are stored in out, which is allocated and
-// freed by the caller of this routine.
-// Note that the forking/etc is not thread-safe or re-entrant.  That's
-// ok for the purpose we need -- reporting leaks detected by heap-checker
-// -- but be careful if you decide to use this routine for other purposes.
-// Returns number of symbols read on error.  If can't symbolize, returns 0
-// and emits an error message about why.
-int SymbolTable::Symbolize() {
-#if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
-  PrintError("Perftools does not know how to call a sub-process on this O/S");
-  return 0;
-#else
-  const char* argv0 = GetProgramInvocationName();
-  if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
-    PrintError("Cannot figure out the name of this executable (argv0)");
-    return 0;
-  }
-  if (access(g_pprof_path->c_str(), R_OK) != 0) {
-    PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
-    return 0;
-  }
-
-  // All this work is to do two-way communication.  ugh.
-  int *child_in = NULL;   // file descriptors
-  int *child_out = NULL;  // for now, we don't worry about child_err
-  int child_fds[5][2];    // socketpair may be called up to five times below
-
-  // The client program may close its stdin and/or stdout and/or stderr
-  // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
-  // In this case the communication between the forked processes may be broken
-  // if either the parent or the child tries to close or duplicate these
-  // descriptors. The loop below produces two pairs of file descriptors, each
-  // greater than 2 (stderr).
-  for (int i = 0; i < 5; i++) {
-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
-      for (int j = 0; j < i; j++) {
-        close(child_fds[j][0]);
-        close(child_fds[j][1]);
-        PrintError("Cannot create a socket pair");
-      }
-      return 0;
-    } else {
-      if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
-        if (child_in == NULL) {
-          child_in = child_fds[i];
-        } else {
-          child_out = child_fds[i];
-          for (int j = 0; j < i; j++) {
-            if (child_fds[j] == child_in) continue;
-            close(child_fds[j][0]);
-            close(child_fds[j][1]);
-          }
-          break;
-        }
-      }
-    }
-  }
-
-  switch (fork()) {
-    case -1: {  // error
-      close(child_in[0]);
-      close(child_in[1]);
-      close(child_out[0]);
-      close(child_out[1]);
-      PrintError("Unknown error calling fork()");
-      return 0;
-    }
-    case 0: {  // child
-      close(child_in[1]);   // child uses the 0's, parent uses the 1's
-      close(child_out[1]);  // child uses the 0's, parent uses the 1's
-      close(0);
-      close(1);
-      if (dup2(child_in[0], 0) == -1) _exit(1);
-      if (dup2(child_out[0], 1) == -1) _exit(2);
-      // Unset vars that might cause trouble when we fork
-      unsetenv("CPUPROFILE");
-      unsetenv("HEAPPROFILE");
-      unsetenv("HEAPCHECK");
-      unsetenv("PERFTOOLS_VERBOSE");
-      execlp(g_pprof_path->c_str(), g_pprof_path->c_str(),
-             "--symbols", argv0, NULL);
-      _exit(3);  // if execvp fails, it's bad news for us
-    }
-    default: {  // parent
-      close(child_in[0]);   // child uses the 0's, parent uses the 1's
-      close(child_out[0]);  // child uses the 0's, parent uses the 1's
-#ifdef HAVE_POLL_H
-      // Waiting for 1ms seems to give the OS time to notice any errors.
-      poll(0, 0, 1);
-      // For maximum safety, we check to make sure the execlp
-      // succeeded before trying to write.  (Otherwise we'll get a
-      // SIGPIPE.)  For systems without poll.h, we'll just skip this
-      // check, and trust that the user set PPROF_PATH correctly!
-      struct pollfd pfd = { child_in[1], POLLOUT, 0 };
-      if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
-          (pfd.revents & (POLLHUP|POLLERR))) {
-        PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
-        return 0;
-      }
-#endif
-#if defined(__CYGWIN__) || defined(__CYGWIN32__)
-      // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd.  Convert.
-      const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
-      DumpProcSelfMaps(symbols_handle);
-#else
-      DumpProcSelfMaps(child_in[1]);  // what pprof expects on stdin
-#endif
-
-      // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
-      // address to feed to pprof.
-      const int kOutBufSize = 24 * symbolization_table_.size();
-      char *pprof_buffer = new char[kOutBufSize];
-      int written = 0;
-      for (SymbolMap::const_iterator iter = symbolization_table_.begin();
-           iter != symbolization_table_.end(); ++iter) {
-        written += snprintf(pprof_buffer + written, kOutBufSize - written,
-                 // pprof expects format to be 0xXXXXXX
-                 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
-      }
-      write(child_in[1], pprof_buffer, strlen(pprof_buffer));
-      close(child_in[1]);             // that's all we need to write
-
-      const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
-      int total_bytes_read = 0;
-      delete[] symbol_buffer_;
-      symbol_buffer_ = new char[kSymbolBufferSize];
-      memset(symbol_buffer_, '\0', kSymbolBufferSize);
-      while (1) {
-        int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
-                              kSymbolBufferSize - total_bytes_read);
-        if (bytes_read < 0) {
-          close(child_out[1]);
-          PrintError("Cannot read data from pprof");
-          return 0;
-        } else if (bytes_read == 0) {
-          close(child_out[1]);
-          wait(NULL);
-          break;
-        } else {
-          total_bytes_read += bytes_read;
-        }
-      }
-      // We have successfully read the output of pprof into out.  Make sure
-      // the last symbol is full (we can tell because it ends with a \n).
-      if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
-        return 0;
-      // make the symbolization_table_ values point to the output vector
-      SymbolMap::iterator fill = symbolization_table_.begin();
-      int num_symbols = 0;
-      const char *current_name = symbol_buffer_;
-      for (int i = 0; i < total_bytes_read; i++) {
-        if (symbol_buffer_[i] == '\n') {
-          fill->second = current_name;
-          symbol_buffer_[i] = '\0';
-          current_name = symbol_buffer_ + i + 1;
-          fill++;
-          num_symbols++;
-        }
-      }
-      return num_symbols;
-    }
-  }
-  PrintError("Unkown error (should never occur!)");
-  return 0;  // shouldn't be reachable
-#endif
-}
diff --git a/contrib/libtcmalloc/src/symbolize.h b/contrib/libtcmalloc/src/symbolize.h
deleted file mode 100644
index 728d073308a..00000000000
--- a/contrib/libtcmalloc/src/symbolize.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2009, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-
-#ifndef TCMALLOC_SYMBOLIZE_H_
-#define TCMALLOC_SYMBOLIZE_H_
-
-#include "config.h"
-#ifdef HAVE_STDINT_H
-#include <stdint.h>  // for uintptr_t
-#endif
-#include <stddef.h>  // for NULL
-#include <map>
-
-using std::map;
-
-// SymbolTable encapsulates the address operations necessary for stack trace
-// symbolization. A common use-case is to Add() the addresses from one or
-// several stack traces to a table, call Symbolize() once and use GetSymbol()
-// to get the symbol names for pretty-printing the stack traces.
-class SymbolTable {
- public:
-  SymbolTable()
-    : symbol_buffer_(NULL) {}
-  ~SymbolTable() {
-    delete[] symbol_buffer_;
-  }
-
-  // Adds an address to the table. This may overwrite a currently known symbol
-  // name, so Add() should not generally be called after Symbolize().
-  void Add(const void* addr);
-
-  // Returns the symbol name for addr, if the given address was added before
-  // the last successful call to Symbolize(). Otherwise may return an empty
-  // c-string.
-  const char* GetSymbol(const void* addr);
-
-  // Obtains the symbol names for the addresses stored in the table and returns
-  // the number of addresses actually symbolized.
-  int Symbolize();
-
- private:
-  typedef map<const void*, const char*> SymbolMap;
-
-  // An average size of memory allocated for a stack trace symbol.
-  static const int kSymbolSize = 1024;
-
-  // Map from addresses to symbol names.
-  SymbolMap symbolization_table_;
-
-  // Pointer to the buffer that stores the symbol names.
-  char *symbol_buffer_;
-};
-
-#endif  // TCMALLOC_SYMBOLIZE_H_
diff --git a/contrib/libtcmalloc/src/system-alloc.cc b/contrib/libtcmalloc/src/system-alloc.cc
deleted file mode 100644
index 1356513c1ce..00000000000
--- a/contrib/libtcmalloc/src/system-alloc.cc
+++ /dev/null
@@ -1,567 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-
-#include "config.h"
-#include <errno.h>                      // for EAGAIN, errno
-#include <fcntl.h>                      // for open, O_RDWR
-#include <stddef.h>                     // for size_t, NULL, ptrdiff_t
-#if defined HAVE_STDINT_H
-#include <stdint.h>                     // for uintptr_t, intptr_t
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#ifdef HAVE_MMAP
-#include <sys/mman.h>                   // for munmap, mmap, MADV_DONTNEED, etc
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>                     // for sbrk, getpagesize, off_t
-#endif
-#include <new>                          // for operator new
-#include <gperftools/malloc_extension.h>
-#include "base/basictypes.h"
-#include "base/commandlineflags.h"
-#include "base/spinlock.h"              // for SpinLockHolder, SpinLock, etc
-#include "common.h"
-#include "internal_logging.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-
-// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
-// form of the name instead.
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-// Linux added support for MADV_FREE in 4.5 but we aren't ready to use it
-// yet. Among other things, using compile-time detection leads to poor
-// results when compiling on a system with MADV_FREE and running on a
-// system without it. See https://github.com/gperftools/gperftools/issues/780.
-#if defined(__linux__) && defined(MADV_FREE) && !defined(TCMALLOC_USE_MADV_FREE)
-# undef MADV_FREE
-#endif
-
-// MADV_FREE is specifically designed for use by malloc(), but only
-// FreeBSD supports it; in linux we fall back to the somewhat inferior
-// MADV_DONTNEED.
-#if !defined(MADV_FREE) && defined(MADV_DONTNEED)
-# define MADV_FREE  MADV_DONTNEED
-#endif
-
-// Solaris has a bug where it doesn't declare madvise() for C++.
-//    http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
-#if defined(__sun) && defined(__SVR4)
-# include <sys/types.h>    // for caddr_t
-  extern "C" { extern int madvise(caddr_t, size_t, int); }
-#endif
-
-// Set kDebugMode mode so that we can have use C++ conditionals
-// instead of preprocessor conditionals.
-#ifdef NDEBUG
-static const bool kDebugMode = false;
-#else
-static const bool kDebugMode = true;
-#endif
-
-// TODO(sanjay): Move the code below into the tcmalloc namespace
-using tcmalloc::kLog;
-using tcmalloc::Log;
-
-// Anonymous namespace to avoid name conflicts on "CheckAddressBits".
-namespace {
-
-// Check that no bit is set at position ADDRESS_BITS or higher.
-template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) {
-  return (ptr >> ADDRESS_BITS) == 0;
-}
-
-// Specialize for the bit width of a pointer to avoid undefined shift.
-template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {
-  return true;
-}
-
-}  // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
-
-COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*),
-               address_bits_larger_than_pointer_size);
-
-static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
-
-#if defined(HAVE_MMAP) || defined(MADV_FREE)
-// Page size is initialized on demand (only needed for mmap-based allocators)
-static size_t pagesize = 0;
-#endif
-
-// The current system allocator
-SysAllocator* sys_alloc = NULL;
-
-// Number of bytes taken from system.
-size_t TCMalloc_SystemTaken = 0;
-
-// Configuration parameters.
-DEFINE_int32(malloc_devmem_start,
-             EnvToInt("TCMALLOC_DEVMEM_START", 0),
-             "Physical memory starting location in MB for /dev/mem allocation."
-             "  Setting this to 0 disables /dev/mem allocation");
-DEFINE_int32(malloc_devmem_limit,
-             EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0),
-             "Physical memory limit location in MB for /dev/mem allocation."
-             "  Setting this to 0 means no limit.");
-DEFINE_bool(malloc_skip_sbrk,
-            EnvToBool("TCMALLOC_SKIP_SBRK", false),
-            "Whether sbrk can be used to obtain memory.");
-DEFINE_bool(malloc_skip_mmap,
-            EnvToBool("TCMALLOC_SKIP_MMAP", false),
-            "Whether mmap can be used to obtain memory.");
-DEFINE_bool(malloc_disable_memory_release,
-            EnvToBool("TCMALLOC_DISABLE_MEMORY_RELEASE", false),
-            "Whether MADV_FREE/MADV_DONTNEED should be used"
-            " to return unused memory to the system.");
-
-// static allocators
-class SbrkSysAllocator : public SysAllocator {
-public:
-  SbrkSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-static union {
-  char buf[sizeof(SbrkSysAllocator)];
-  void *ptr;
-} sbrk_space;
-
-class MmapSysAllocator : public SysAllocator {
-public:
-  MmapSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-static union {
-  char buf[sizeof(MmapSysAllocator)];
-  void *ptr;
-} mmap_space;
-
-class DevMemSysAllocator : public SysAllocator {
-public:
-  DevMemSysAllocator() : SysAllocator() {
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-};
-
-class DefaultSysAllocator : public SysAllocator {
- public:
-  DefaultSysAllocator() : SysAllocator() {
-    for (int i = 0; i < kMaxAllocators; i++) {
-      failed_[i] = true;
-      allocs_[i] = NULL;
-      names_[i] = NULL;
-    }
-  }
-  void SetChildAllocator(SysAllocator* alloc, unsigned int index,
-                         const char* name) {
-    if (index < kMaxAllocators && alloc != NULL) {
-      allocs_[index] = alloc;
-      failed_[index] = false;
-      names_[index] = name;
-    }
-  }
-  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
-
- private:
-  static const int kMaxAllocators = 2;
-  bool failed_[kMaxAllocators];
-  SysAllocator* allocs_[kMaxAllocators];
-  const char* names_[kMaxAllocators];
-};
-static union {
-  char buf[sizeof(DefaultSysAllocator)];
-  void *ptr;
-} default_space;
-static const char sbrk_name[] = "SbrkSysAllocator";
-static const char mmap_name[] = "MmapSysAllocator";
-
-
-void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
-                              size_t alignment) {
-#if !defined(HAVE_SBRK) || defined(__UCLIBC__)
-  return NULL;
-#else
-  // Check if we should use sbrk allocation.
-  // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
-  // state) and eventually gets initialized to the specified value.  Note
-  // that this code runs for a while before the flags are initialized.
-  // That means that even if this flag is set to true, some (initial)
-  // memory will be allocated with sbrk before the flag takes effect.
-  if (FLAGS_malloc_skip_sbrk) {
-    return NULL;
-  }
-
-  // sbrk will release memory if passed a negative number, so we do
-  // a strict check here
-  if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
-
-  // This doesn't overflow because TCMalloc_SystemAlloc has already
-  // tested for overflow at the alignment boundary.
-  size = ((size + alignment - 1) / alignment) * alignment;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Check that we we're not asking for so much more memory that we'd
-  // wrap around the end of the virtual address space.  (This seems
-  // like something sbrk() should check for us, and indeed opensolaris
-  // does, but glibc does not:
-  //    http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true
-  //    http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc
-  // Without this check, sbrk may succeed when it ought to fail.)
-  if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) {
-    return NULL;
-  }
-
-  void* result = sbrk(size);
-  if (result == reinterpret_cast<void*>(-1)) {
-    return NULL;
-  }
-
-  // Is it aligned?
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-  if ((ptr & (alignment-1)) == 0)  return result;
-
-  // Try to get more memory for alignment
-  size_t extra = alignment - (ptr & (alignment-1));
-  void* r2 = sbrk(extra);
-  if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) {
-    // Contiguous with previous result
-    return reinterpret_cast<void*>(ptr + extra);
-  }
-
-  // Give up and ask for "size + alignment - 1" bytes so
-  // that we can find an aligned region within it.
-  result = sbrk(size + alignment - 1);
-  if (result == reinterpret_cast<void*>(-1)) {
-    return NULL;
-  }
-  ptr = reinterpret_cast<uintptr_t>(result);
-  if ((ptr & (alignment-1)) != 0) {
-    ptr += alignment - (ptr & (alignment-1));
-  }
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_SBRK
-}
-
-void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
-                              size_t alignment) {
-#ifndef HAVE_MMAP
-  return NULL;
-#else
-  // Check if we should use mmap allocation.
-  // FLAGS_malloc_skip_mmap starts out as false (its uninitialized
-  // state) and eventually gets initialized to the specified value.  Note
-  // that this code runs for a while before the flags are initialized.
-  // Chances are we never get here before the flags are initialized since
-  // sbrk is used until the heap is exhausted (before mmap is used).
-  if (FLAGS_malloc_skip_mmap) {
-    return NULL;
-  }
-
-  // Enforce page alignment
-  if (pagesize == 0) pagesize = getpagesize();
-  if (alignment < pagesize) alignment = pagesize;
-  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
-  if (aligned_size < size) {
-    return NULL;
-  }
-  size = aligned_size;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > pagesize) {
-    extra = alignment - pagesize;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void* result = mmap(NULL, size + extra,
-                      PROT_READ|PROT_WRITE,
-                      MAP_PRIVATE|MAP_ANONYMOUS,
-                      -1, 0);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    return NULL;
-  }
-
-  // Adjust the return memory so it is aligned
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-
-  // Return the unused memory to the system
-  if (adjust > 0) {
-    munmap(reinterpret_cast<void*>(ptr), adjust);
-  }
-  if (adjust < extra) {
-    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
-  }
-
-  ptr += adjust;
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_MMAP
-}
-
-void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                size_t alignment) {
-#ifndef HAVE_MMAP
-  return NULL;
-#else
-  static bool initialized = false;
-  static off_t physmem_base;  // next physical memory address to allocate
-  static off_t physmem_limit; // maximum physical address allowed
-  static int physmem_fd;      // file descriptor for /dev/mem
-
-  // Check if we should use /dev/mem allocation.  Note that it may take
-  // a while to get this flag initialized, so meanwhile we fall back to
-  // the next allocator.  (It looks like 7MB gets allocated before
-  // this flag gets initialized -khr.)
-  if (FLAGS_malloc_devmem_start == 0) {
-    // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to
-    // try us again next time.
-    return NULL;
-  }
-
-  if (!initialized) {
-    physmem_fd = open("/dev/mem", O_RDWR);
-    if (physmem_fd < 0) {
-      return NULL;
-    }
-    physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL;
-    physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL;
-    initialized = true;
-  }
-
-  // Enforce page alignment
-  if (pagesize == 0) pagesize = getpagesize();
-  if (alignment < pagesize) alignment = pagesize;
-  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
-  if (aligned_size < size) {
-    return NULL;
-  }
-  size = aligned_size;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
-  // Ask for extra memory if alignment > pagesize
-  size_t extra = 0;
-  if (alignment > pagesize) {
-    extra = alignment - pagesize;
-  }
-
-  // check to see if we have any memory left
-  if (physmem_limit != 0 &&
-      ((size + extra) > (physmem_limit - physmem_base))) {
-    return NULL;
-  }
-
-  // Note: size + extra does not overflow since:
-  //            size + alignment < (1<<NBITS).
-  // and        extra <= alignment
-  // therefore  size + extra < (1<<NBITS)
-  void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
-                      MAP_SHARED, physmem_fd, physmem_base);
-  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
-    return NULL;
-  }
-  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
-
-  // Adjust the return memory so it is aligned
-  size_t adjust = 0;
-  if ((ptr & (alignment - 1)) != 0) {
-    adjust = alignment - (ptr & (alignment - 1));
-  }
-
-  // Return the unused virtual memory to the system
-  if (adjust > 0) {
-    munmap(reinterpret_cast<void*>(ptr), adjust);
-  }
-  if (adjust < extra) {
-    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
-  }
-
-  ptr += adjust;
-  physmem_base += adjust + size;
-
-  return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_MMAP
-}
-
-void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size,
-                                 size_t alignment) {
-  for (int i = 0; i < kMaxAllocators; i++) {
-    if (!failed_[i] && allocs_[i] != NULL) {
-      void* result = allocs_[i]->Alloc(size, actual_size, alignment);
-      if (result != NULL) {
-        return result;
-      }
-      failed_[i] = true;
-    }
-  }
-  // After both failed, reset "failed_" to false so that a single failed
-  // allocation won't make the allocator never work again.
-  for (int i = 0; i < kMaxAllocators; i++) {
-    failed_[i] = false;
-  }
-  return NULL;
-}
-
-ATTRIBUTE_WEAK ATTRIBUTE_NOINLINE
-SysAllocator *tc_get_sysalloc_override(SysAllocator *def)
-{
-  return def;
-}
-
-static bool system_alloc_inited = false;
-void InitSystemAllocators(void) {
-  MmapSysAllocator *mmap = new (mmap_space.buf) MmapSysAllocator();
-  SbrkSysAllocator *sbrk = new (sbrk_space.buf) SbrkSysAllocator();
-
-  // In 64-bit debug mode, place the mmap allocator first since it
-  // allocates pointers that do not fit in 32 bits and therefore gives
-  // us better testing of code's 64-bit correctness.  It also leads to
-  // less false negatives in heap-checking code.  (Numbers are less
-  // likely to look like pointers and therefore the conservative gc in
-  // the heap-checker is less likely to misinterpret a number as a
-  // pointer).
-  DefaultSysAllocator *sdef = new (default_space.buf) DefaultSysAllocator();
-  if (kDebugMode && sizeof(void*) > 4) {
-    sdef->SetChildAllocator(mmap, 0, mmap_name);
-    sdef->SetChildAllocator(sbrk, 1, sbrk_name);
-  } else {
-    sdef->SetChildAllocator(sbrk, 0, sbrk_name);
-    sdef->SetChildAllocator(mmap, 1, mmap_name);
-  }
-
-  sys_alloc = tc_get_sysalloc_override(sdef);
-}
-
-void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
-                           size_t alignment) {
-  // Discard requests that overflow
-  if (size + alignment < size) return NULL;
-
-  SpinLockHolder lock_holder(&spinlock);
-
-  if (!system_alloc_inited) {
-    InitSystemAllocators();
-    system_alloc_inited = true;
-  }
-
-  // Enforce minimum alignment
-  if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner);
-
-  size_t actual_size_storage;
-  if (actual_size == NULL) {
-    actual_size = &actual_size_storage;
-  }
-
-  void* result = sys_alloc->Alloc(size, actual_size, alignment);
-  if (result != NULL) {
-    CHECK_CONDITION(
-      CheckAddressBits<kAddressBits>(
-        reinterpret_cast<uintptr_t>(result) + *actual_size - 1));
-    TCMalloc_SystemTaken += *actual_size;
-  }
-  return result;
-}
-
-bool TCMalloc_SystemRelease(void* start, size_t length) {
-#ifdef MADV_FREE
-  if (FLAGS_malloc_devmem_start) {
-    // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
-    // mapping /dev/mem for heap memory.
-    return false;
-  }
-  if (FLAGS_malloc_disable_memory_release) return false;
-  if (pagesize == 0) pagesize = getpagesize();
-  const size_t pagemask = pagesize - 1;
-
-  size_t new_start = reinterpret_cast<size_t>(start);
-  size_t end = new_start + length;
-  size_t new_end = end;
-
-  // Round up the starting address and round down the ending address
-  // to be page aligned:
-  new_start = (new_start + pagesize - 1) & ~pagemask;
-  new_end = new_end & ~pagemask;
-
-  ASSERT((new_start & pagemask) == 0);
-  ASSERT((new_end & pagemask) == 0);
-  ASSERT(new_start >= reinterpret_cast<size_t>(start));
-  ASSERT(new_end <= end);
-
-  if (new_end > new_start) {
-    int result;
-    do {
-      result = madvise(reinterpret_cast<char*>(new_start),
-          new_end - new_start, MADV_FREE);
-    } while (result == -1 && errno == EAGAIN);
-
-    return result != -1;
-  }
-#endif
-  return false;
-}
-
-void TCMalloc_SystemCommit(void* start, size_t length) {
-  // Nothing to do here.  TCMalloc_SystemRelease does not alter pages
-  // such that they need to be re-committed before they can be used by the
-  // application.
-}
-
-#pragma GCC diagnostic pop
diff --git a/contrib/libtcmalloc/src/system-alloc.h b/contrib/libtcmalloc/src/system-alloc.h
deleted file mode 100644
index 2c06c183d63..00000000000
--- a/contrib/libtcmalloc/src/system-alloc.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// 
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat
-//
-// Routine that uses sbrk/mmap to allocate memory from the system.
-// Useful for implementing malloc.
-
-#ifndef TCMALLOC_SYSTEM_ALLOC_H_
-#define TCMALLOC_SYSTEM_ALLOC_H_
-
-#include "config.h"
-#include <stddef.h>                     // for size_t
-
-class SysAllocator;
-
-// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment
-//
-// Allocate and return "N" bytes of zeroed memory.
-//
-// If actual_bytes is NULL then the returned memory is exactly the
-// requested size.  If actual bytes is non-NULL then the allocator
-// may optionally return more bytes than asked for (i.e. return an
-// entire "huge" page if a huge page allocator is in use).
-//
-// The returned pointer is a multiple of "alignment" if non-zero. The
-// returned pointer will always be aligned suitably for holding a
-// void*, double, or size_t. In addition, if this platform defines
-// CACHELINE_ALIGNED, the return pointer will always be cacheline
-// aligned.
-//
-// Returns NULL when out of memory.
-extern PERFTOOLS_DLL_DECL
-void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes,
-			   size_t alignment = 0);
-
-// This call is a hint to the operating system that the pages
-// contained in the specified range of memory will not be used for a
-// while, and can be released for use by other processes or the OS.
-// Pages which are released in this way may be destroyed (zeroed) by
-// the OS.  The benefit of this function is that it frees memory for
-// use by the system, the cost is that the pages are faulted back into
-// the address space next time they are touched, which can impact
-// performance.  (Only pages fully covered by the memory region will
-// be released, partial pages will not.)
-//
-// Returns false if release failed or not supported.
-extern PERFTOOLS_DLL_DECL
-bool TCMalloc_SystemRelease(void* start, size_t length);
-
-// Called to ressurect memory which has been previously released
-// to the system via TCMalloc_SystemRelease.  An attempt to
-// commit a page that is already committed does not cause this
-// function to fail.
-extern PERFTOOLS_DLL_DECL
-void TCMalloc_SystemCommit(void* start, size_t length);
-
-// The current system allocator.
-extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc;
-
-// Number of bytes taken from system.
-extern PERFTOOLS_DLL_DECL size_t TCMalloc_SystemTaken;
-
-#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */
diff --git a/contrib/libtcmalloc/src/tcmalloc.cc b/contrib/libtcmalloc/src/tcmalloc.cc
deleted file mode 100644
index b52524b1361..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc.cc
+++ /dev/null
@@ -1,1842 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// A malloc that uses a per-thread cache to satisfy small malloc requests.
-// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
-//
-// See doc/tcmalloc.html for a high-level
-// description of how this malloc works.
-//
-// SYNCHRONIZATION
-//  1. The thread-specific lists are accessed without acquiring any locks.
-//     This is safe because each such list is only accessed by one thread.
-//  2. We have a lock per central free-list, and hold it while manipulating
-//     the central free list for a particular size.
-//  3. The central page allocator is protected by "pageheap_lock".
-//  4. The pagemap (which maps from page-number to descriptor),
-//     can be read without holding any locks, and written while holding
-//     the "pageheap_lock".
-//  5. To improve performance, a subset of the information one can get
-//     from the pagemap is cached in a data structure, pagemap_cache_,
-//     that atomically reads and writes its entries.  This cache can be
-//     read and written without locking.
-//
-//     This multi-threaded access to the pagemap is safe for fairly
-//     subtle reasons.  We basically assume that when an object X is
-//     allocated by thread A and deallocated by thread B, there must
-//     have been appropriate synchronization in the handoff of object
-//     X from thread A to thread B.  The same logic applies to pagemap_cache_.
-//
-// THE PAGEID-TO-SIZECLASS CACHE
-// Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
-// returns 0 for a particular PageID then that means "no information," not that
-// the sizeclass is 0.  The cache may have stale information for pages that do
-// not hold the beginning of any free()'able object.  Staleness is eliminated
-// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
-// do_memalign() for all other relevant pages.
-//
-// PAGEMAP
-// -------
-// Page map contains a mapping from page id to Span.
-//
-// If Span s occupies pages [p..q],
-//      pagemap[p] == s
-//      pagemap[q] == s
-//      pagemap[p+1..q-1] are undefined
-//      pagemap[p-1] and pagemap[q+1] are defined:
-//         NULL if the corresponding page is not yet in the address space.
-//         Otherwise it points to a Span.  This span may be free
-//         or allocated.  If free, it is in one of pageheap's freelist.
-//
-// TODO: Bias reclamation to larger addresses
-// TODO: implement mallinfo/mallopt
-// TODO: Better testing
-//
-// 9/28/2003 (new page-level allocator replaces ptmalloc2):
-// * malloc/free of small objects goes from ~300 ns to ~50 ns.
-// * allocation of a reasonably complicated struct
-//   goes from about 1100 ns to about 300 ns.
-
-#include "config.h"
-#include <gperftools/tcmalloc.h>
-
-#include <errno.h>                      // for ENOMEM, EINVAL, errno
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include <sys/types.h>
-#endif
-#include <stddef.h>                     // for size_t, NULL
-#include <stdlib.h>                     // for getenv
-#include <string.h>                     // for strcmp, memset, strlen, etc
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>                     // for getpagesize, write, etc
-#endif
-#include <algorithm>                    // for max, min
-#include <limits>                       // for numeric_limits
-#include <new>                          // for nothrow_t (ptr only), etc
-#include <vector>                       // for vector
-
-#include <gperftools/malloc_extension.h>
-#include <gperftools/malloc_hook.h>         // for MallocHook
-#include "base/basictypes.h"            // for int64
-#include "base/commandlineflags.h"      // for RegisterFlagValidator, etc
-#include "base/dynamic_annotations.h"   // for RunningOnValgrind
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "central_freelist.h"  // for CentralFreeListPadded
-#include "common.h"            // for StackTrace, kPageShift, etc
-#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
-#include "linked_list.h"       // for SLL_SetNext
-#include "malloc_hook-inl.h"       // for MallocHook::InvokeNewHook, etc
-#include "page_heap.h"         // for PageHeap, PageHeap::Stats
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "span.h"              // for Span, DLL_Prepend, etc
-#include "stack_trace_table.h"  // for StackTraceTable
-#include "static_vars.h"       // for Static
-#include "system-alloc.h"      // for DumpSystemAllocatorStats, etc
-#include "tcmalloc_guard.h"    // for TCMallocGuard
-#include "thread_cache.h"      // for ThreadCache
-
-#ifdef __clang__
-// clang's apparent focus on code size somehow causes it to ignore
-// normal inline directives even for few functions which inlining is
-// key for performance. In order to get performance of clang's
-// generated code closer to normal, we're forcing inlining via
-// attribute.
-#define ALWAYS_INLINE inline __attribute__((always_inline))
-#else
-#define ALWAYS_INLINE inline
-#endif
-
-#include "maybe_emergency_malloc.h"
-
-#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
-# define WIN32_DO_PATCHING 1
-#endif
-
-// Some windows file somewhere (at least on cygwin) #define's small (!)
-#undef small
-
-using STL_NAMESPACE::max;
-using STL_NAMESPACE::numeric_limits;
-using STL_NAMESPACE::vector;
-
-#include "libc_override.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-
-using tcmalloc::AlignmentForSize;
-using tcmalloc::kLog;
-using tcmalloc::kCrash;
-using tcmalloc::kCrashWithStats;
-using tcmalloc::Log;
-using tcmalloc::PageHeap;
-using tcmalloc::PageHeapAllocator;
-using tcmalloc::SizeMap;
-using tcmalloc::Span;
-using tcmalloc::StackTrace;
-using tcmalloc::Static;
-using tcmalloc::ThreadCache;
-
-DECLARE_double(tcmalloc_release_rate);
-
-// For windows, the printf we use to report large allocs is
-// potentially dangerous: it could cause a malloc that would cause an
-// infinite loop.  So by default we set the threshold to a huge number
-// on windows, so this bad situation will never trigger.  You can
-// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you
-// want this functionality.
-#ifdef _WIN32
-const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62;
-#else
-const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30;
-#endif
-DEFINE_int64(tcmalloc_large_alloc_report_threshold,
-             EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD",
-                        kDefaultLargeAllocReportThreshold),
-             "Allocations larger than this value cause a stack "
-             "trace to be dumped to stderr.  The threshold for "
-             "dumping stack traces is increased by a factor of 1.125 "
-             "every time we print a message so that the threshold "
-             "automatically goes up by a factor of ~1000 every 60 "
-             "messages.  This bounds the amount of extra logging "
-             "generated by this flag.  Default value of this flag "
-             "is very large and therefore you should see no extra "
-             "logging unless the flag is overridden.  Set to 0 to "
-             "disable reporting entirely.");
-
-
-// We already declared these functions in tcmalloc.h, but we have to
-// declare them again to give them an ATTRIBUTE_SECTION: we want to
-// put all callers of MallocHook::Invoke* in this module into
-// ATTRIBUTE_SECTION(google_malloc) section, so that
-// MallocHook::GetCallerStackTrace can function accurately.
-#ifndef _WIN32   // windows doesn't have attribute_section, so don't bother
-extern "C" {
-  void* tc_malloc(size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_free(void* ptr) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_cfree(void* ptr) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  void* tc_memalign(size_t __alignment, size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  int tc_posix_memalign(void** ptr, size_t align, size_t size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_valloc(size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_pvalloc(size_t __size) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  void tc_malloc_stats(void) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  int tc_mallopt(int cmd, int value) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-#ifdef HAVE_STRUCT_MALLINFO
-  struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-#endif
-
-  void* tc_new(size_t size)
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_delete(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_newarray(size_t size)
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_deletearray(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  // And the nothrow variants of these:
-  void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  // Surprisingly, standard C++ library implementations use a
-  // nothrow-delete internally.  See, eg:
-  // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
-  void tc_delete_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-  void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-
-  // Some non-standard extensions that we support.
-
-  // This is equivalent to
-  //    OS X: malloc_size()
-  //    glibc: malloc_usable_size()
-  //    Windows: _msize()
-  size_t tc_malloc_size(void* p) PERFTOOLS_THROW
-      ATTRIBUTE_SECTION(google_malloc);
-}  // extern "C"
-#endif  // #ifndef _WIN32
-
-// ----------------------- IMPLEMENTATION -------------------------------
-
-static int tc_new_mode = 0;  // See tc_set_new_mode().
-
-// Routines such as free() and realloc() catch some erroneous pointers
-// passed to them, and invoke the below when they do.  (An erroneous pointer
-// won't be caught if it's within a valid span or a stale span for which
-// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing
-// required) kind of exception handling for these routines.
-namespace {
-void InvalidFree(void* ptr) {
-  if (tcmalloc::IsEmergencyPtr(ptr)) {
-    tcmalloc::EmergencyFree(ptr);
-    return;
-  }
-  Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
-}
-
-size_t InvalidGetSizeForRealloc(const void* old_ptr) {
-  Log(kCrash, __FILE__, __LINE__,
-      "Attempt to realloc invalid pointer", old_ptr);
-  return 0;
-}
-
-size_t InvalidGetAllocatedSize(const void* ptr) {
-  Log(kCrash, __FILE__, __LINE__,
-      "Attempt to get the size of an invalid pointer", ptr);
-  return 0;
-}
-}  // unnamed namespace
-
-// Extract interesting stats
-struct TCMallocStats {
-  uint64_t thread_bytes;      // Bytes in thread caches
-  uint64_t central_bytes;     // Bytes in central cache
-  uint64_t transfer_bytes;    // Bytes in central transfer cache
-  uint64_t metadata_bytes;    // Bytes alloced for metadata
-  PageHeap::Stats pageheap;   // Stats from page heap
-};
-
-// Get stats into "r".  Also, if class_count != NULL, class_count[k]
-// will be set to the total number of objects of size class k in the
-// central cache, transfer cache, and per-thread caches. If small_spans
-// is non-NULL, it is filled.  Same for large_spans.
-static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
-                         PageHeap::SmallSpanStats* small_spans,
-                         PageHeap::LargeSpanStats* large_spans) {
-  r->central_bytes = 0;
-  r->transfer_bytes = 0;
-  for (int cl = 0; cl < kNumClasses; ++cl) {
-    const int length = Static::central_cache()[cl].length();
-    const int tc_length = Static::central_cache()[cl].tc_length();
-    const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes();
-    const size_t size = static_cast<uint64_t>(
-        Static::sizemap()->ByteSizeForClass(cl));
-    r->central_bytes += (size * length) + cache_overhead;
-    r->transfer_bytes += (size * tc_length);
-    if (class_count) {
-      // Sum the lengths of all per-class freelists, except the per-thread
-      // freelists, which get counted when we call GetThreadStats(), below.
-      class_count[cl] = length + tc_length;
-    }
-
-  }
-
-  // Add stats from per-thread heaps
-  r->thread_bytes = 0;
-  { // scope
-    SpinLockHolder h(Static::pageheap_lock());
-    ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
-    r->metadata_bytes = tcmalloc::metadata_system_bytes();
-    r->pageheap = Static::pageheap()->stats();
-    if (small_spans != NULL) {
-      Static::pageheap()->GetSmallSpanStats(small_spans);
-    }
-    if (large_spans != NULL) {
-      Static::pageheap()->GetLargeSpanStats(large_spans);
-    }
-  }
-}
-
-static double PagesToMiB(uint64_t pages) {
-  return (pages << kPageShift) / 1048576.0;
-}
-
-// WRITE stats to "out"
-static void DumpStats(TCMalloc_Printer* out, int level) {
-  TCMallocStats stats;
-  uint64_t class_count[kNumClasses];
-  PageHeap::SmallSpanStats small;
-  PageHeap::LargeSpanStats large;
-  if (level >= 2) {
-    ExtractStats(&stats, class_count, &small, &large);
-  } else {
-    ExtractStats(&stats, NULL, NULL, NULL);
-  }
-
-  static const double MiB = 1048576.0;
-
-  const uint64_t virtual_memory_used = (stats.pageheap.system_bytes
-                                        + stats.metadata_bytes);
-  const uint64_t physical_memory_used = (virtual_memory_used
-                                         - stats.pageheap.unmapped_bytes);
-  const uint64_t bytes_in_use_by_app = (physical_memory_used
-                                        - stats.metadata_bytes
-                                        - stats.pageheap.free_bytes
-                                        - stats.central_bytes
-                                        - stats.transfer_bytes
-                                        - stats.thread_bytes);
-
-#ifdef TCMALLOC_SMALL_BUT_SLOW
-  out->printf(
-      "NOTE:  SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
-#endif
-  out->printf(
-      "------------------------------------------------\n"
-      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
-      "MALLOC:   ------------\n"
-      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
-      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
-      "MALLOC:   ------------\n"
-      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
-      "MALLOC:\n"
-      "MALLOC:   %12" PRIu64 "              Spans in use\n"
-      "MALLOC:   %12" PRIu64 "              Thread heaps in use\n"
-      "MALLOC:   %12" PRIu64 "              Tcmalloc page size\n"
-      "------------------------------------------------\n"
-      "Call ReleaseFreeMemory() to release freelist memory to the OS"
-      " (via madvise()).\n"
-      "Bytes released to the OS take up virtual address space"
-      " but no physical memory.\n",
-      bytes_in_use_by_app, bytes_in_use_by_app / MiB,
-      stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
-      stats.central_bytes, stats.central_bytes / MiB,
-      stats.transfer_bytes, stats.transfer_bytes / MiB,
-      stats.thread_bytes, stats.thread_bytes / MiB,
-      stats.metadata_bytes, stats.metadata_bytes / MiB,
-      physical_memory_used, physical_memory_used / MiB,
-      stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
-      virtual_memory_used, virtual_memory_used / MiB,
-      uint64_t(Static::span_allocator()->inuse()),
-      uint64_t(ThreadCache::HeapsInUse()),
-      uint64_t(kPageSize));
-
-  if (level >= 2) {
-    out->printf("------------------------------------------------\n");
-    out->printf("Total size of freelists for per-thread caches,\n");
-    out->printf("transfer cache, and central cache, by size class\n");
-    out->printf("------------------------------------------------\n");
-    uint64_t cumulative = 0;
-    for (int cl = 0; cl < kNumClasses; ++cl) {
-      if (class_count[cl] > 0) {
-        uint64_t class_bytes =
-            class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
-        cumulative += class_bytes;
-        out->printf("class %3d [ %8" PRIuS " bytes ] : "
-                "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n",
-                cl, Static::sizemap()->ByteSizeForClass(cl),
-                class_count[cl],
-                class_bytes / MiB,
-                cumulative / MiB);
-      }
-    }
-
-    // append page heap info
-    int nonempty_sizes = 0;
-    for (int s = 0; s < kMaxPages; s++) {
-      if (small.normal_length[s] + small.returned_length[s] > 0) {
-        nonempty_sizes++;
-      }
-    }
-    out->printf("------------------------------------------------\n");
-    out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
-                nonempty_sizes, stats.pageheap.free_bytes / MiB,
-                stats.pageheap.unmapped_bytes / MiB);
-    out->printf("------------------------------------------------\n");
-    uint64_t total_normal = 0;
-    uint64_t total_returned = 0;
-    for (int s = 0; s < kMaxPages; s++) {
-      const int n_length = small.normal_length[s];
-      const int r_length = small.returned_length[s];
-      if (n_length + r_length > 0) {
-        uint64_t n_pages = s * n_length;
-        uint64_t r_pages = s * r_length;
-        total_normal += n_pages;
-        total_returned += r_pages;
-        out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-                    "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-                    s,
-                    (n_length + r_length),
-                    PagesToMiB(n_pages + r_pages),
-                    PagesToMiB(total_normal + total_returned),
-                    PagesToMiB(r_pages),
-                    PagesToMiB(total_returned));
-      }
-    }
-
-    total_normal += large.normal_pages;
-    total_returned += large.returned_pages;
-    out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-                "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-                static_cast<unsigned int>(large.spans),
-                PagesToMiB(large.normal_pages + large.returned_pages),
-                PagesToMiB(total_normal + total_returned),
-                PagesToMiB(large.returned_pages),
-                PagesToMiB(total_returned));
-  }
-}
-
-static void PrintStats(int level) {
-  const int kBufferSize = 16 << 10;
-  char* buffer = new char[kBufferSize];
-  TCMalloc_Printer printer(buffer, kBufferSize);
-  DumpStats(&printer, level);
-  write(STDERR_FILENO, buffer, strlen(buffer));
-  delete[] buffer;
-}
-
-static void** DumpHeapGrowthStackTraces() {
-  // Count how much space we need
-  int needed_slots = 0;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    for (StackTrace* t = Static::growth_stacks();
-         t != NULL;
-         t = reinterpret_cast<StackTrace*>(
-             t->stack[tcmalloc::kMaxStackDepth-1])) {
-      needed_slots += 3 + t->depth;
-    }
-    needed_slots += 100;            // Slop in case list grows
-    needed_slots += needed_slots/8; // An extra 12.5% slop
-  }
-
-  void** result = new void*[needed_slots];
-  if (result == NULL) {
-    Log(kLog, __FILE__, __LINE__,
-        "tcmalloc: allocation failed for stack trace slots",
-        needed_slots * sizeof(*result));
-    return NULL;
-  }
-
-  SpinLockHolder h(Static::pageheap_lock());
-  int used_slots = 0;
-  for (StackTrace* t = Static::growth_stacks();
-       t != NULL;
-       t = reinterpret_cast<StackTrace*>(
-           t->stack[tcmalloc::kMaxStackDepth-1])) {
-    ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
-    if (used_slots + 3 + t->depth >= needed_slots) {
-      // No more room
-      break;
-    }
-
-    result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
-    result[used_slots+1] = reinterpret_cast<void*>(t->size);
-    result[used_slots+2] = reinterpret_cast<void*>(t->depth);
-    for (int d = 0; d < t->depth; d++) {
-      result[used_slots+3+d] = t->stack[d];
-    }
-    used_slots += 3 + t->depth;
-  }
-  result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
-  return result;
-}
-
-static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) {
-  PageID page = 1;  // Some code may assume that page==0 is never used
-  bool done = false;
-  while (!done) {
-    // Accumulate a small number of ranges in a local buffer
-    static const int kNumRanges = 16;
-    static base::MallocRange ranges[kNumRanges];
-    int n = 0;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      while (n < kNumRanges) {
-        if (!Static::pageheap()->GetNextRange(page, &ranges[n])) {
-          done = true;
-          break;
-        } else {
-          uintptr_t limit = ranges[n].address + ranges[n].length;
-          page = (limit + kPageSize - 1) >> kPageShift;
-          n++;
-        }
-      }
-    }
-
-    for (int i = 0; i < n; i++) {
-      (*func)(arg, &ranges[i]);
-    }
-  }
-}
-
-// TCMalloc's support for extra malloc interfaces
-class TCMallocImplementation : public MallocExtension {
- private:
-  // ReleaseToSystem() might release more than the requested bytes because
-  // the page heap releases at the span granularity, and spans are of wildly
-  // different sizes.  This member keeps track of the extra bytes bytes
-  // released so that the app can periodically call ReleaseToSystem() to
-  // release memory at a constant rate.
-  // NOTE: Protected by Static::pageheap_lock().
-  size_t extra_bytes_released_;
-
- public:
-  TCMallocImplementation()
-      : extra_bytes_released_(0) {
-  }
-
-  virtual void GetStats(char* buffer, int buffer_length) {
-    ASSERT(buffer_length > 0);
-    TCMalloc_Printer printer(buffer, buffer_length);
-
-    // Print level one stats unless lots of space is available
-    if (buffer_length < 10000) {
-      DumpStats(&printer, 1);
-    } else {
-      DumpStats(&printer, 2);
-    }
-  }
-
-  // We may print an extra, tcmalloc-specific warning message here.
-  virtual void GetHeapSample(MallocExtensionWriter* writer) {
-    if (FLAGS_tcmalloc_sample_parameter == 0) {
-      const char* const kWarningMsg =
-          "%warn\n"
-          "%warn This heap profile does not have any data in it, because\n"
-          "%warn the application was run with heap sampling turned off.\n"
-          "%warn To get useful data from GetHeapSample(), you must\n"
-          "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n"
-          "%warn a positive sampling period, such as 524288.\n"
-          "%warn\n";
-      writer->append(kWarningMsg, strlen(kWarningMsg));
-    }
-    MallocExtension::GetHeapSample(writer);
-  }
-
-  virtual void** ReadStackTraces(int* sample_period) {
-    tcmalloc::StackTraceTable table;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Span* sampled = Static::sampled_objects();
-      for (Span* s = sampled->next; s != sampled; s = s->next) {
-        table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects));
-      }
-    }
-    *sample_period = ThreadCache::GetCache()->GetSamplePeriod();
-    return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock
-  }
-
-  virtual void** ReadHeapGrowthStackTraces() {
-    return DumpHeapGrowthStackTraces();
-  }
-
-  virtual size_t GetThreadCacheSize() {
-    ThreadCache* tc = ThreadCache::GetCacheIfPresent();
-    if (!tc)
-      return 0;
-    return tc->Size();
-  }
-
-  virtual void MarkThreadTemporarilyIdle() {
-    ThreadCache::BecomeTemporarilyIdle();
-  }
-
-  virtual void Ranges(void* arg, RangeFunction func) {
-    IterateOverRanges(arg, func);
-  }
-
-  virtual bool GetNumericProperty(const char* name, size_t* value) {
-    ASSERT(name != NULL);
-
-    if (strcmp(name, "generic.current_allocated_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.pageheap.system_bytes
-               - stats.thread_bytes
-               - stats.central_bytes
-               - stats.transfer_bytes
-               - stats.pageheap.free_bytes
-               - stats.pageheap.unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "generic.heap_size") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.pageheap.system_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
-      // Kept for backwards compatibility.  Now defined externally as:
-      //    pageheap_free_bytes + pageheap_unmapped_bytes.
-      SpinLockHolder l(Static::pageheap_lock());
-      PageHeap::Stats stats = Static::pageheap()->stats();
-      *value = stats.free_bytes + stats.unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.central_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.transfer_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.thread_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = Static::pageheap()->stats().free_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = Static::pageheap()->stats().unmapped_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      *value = ThreadCache::overall_thread_cache_size();
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
-      TCMallocStats stats;
-      ExtractStats(&stats, NULL, NULL, NULL);
-      *value = stats.thread_bytes;
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
-      *value = size_t(Static::pageheap()->GetAggressiveDecommit());
-      return true;
-    }
-
-    return false;
-  }
-
-  virtual bool SetNumericProperty(const char* name, size_t value) {
-    ASSERT(name != NULL);
-
-    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
-      SpinLockHolder l(Static::pageheap_lock());
-      ThreadCache::set_overall_thread_cache_size(value);
-      return true;
-    }
-
-    if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
-      Static::pageheap()->SetAggressiveDecommit(value != 0);
-      return true;
-    }
-
-    return false;
-  }
-
-  virtual void MarkThreadIdle() {
-    ThreadCache::BecomeIdle();
-  }
-
-  virtual void MarkThreadBusy();  // Implemented below
-
-  virtual SysAllocator* GetSystemAllocator() {
-    SpinLockHolder h(Static::pageheap_lock());
-    return sys_alloc;
-  }
-
-  virtual void SetSystemAllocator(SysAllocator* alloc) {
-    SpinLockHolder h(Static::pageheap_lock());
-    sys_alloc = alloc;
-  }
-
-  virtual void ReleaseToSystem(size_t num_bytes) {
-    SpinLockHolder h(Static::pageheap_lock());
-    if (num_bytes <= extra_bytes_released_) {
-      // We released too much on a prior call, so don't release any
-      // more this time.
-      extra_bytes_released_ = extra_bytes_released_ - num_bytes;
-      return;
-    }
-    num_bytes = num_bytes - extra_bytes_released_;
-    // num_bytes might be less than one page.  If we pass zero to
-    // ReleaseAtLeastNPages, it won't do anything, so we release a whole
-    // page now and let extra_bytes_released_ smooth it out over time.
-    Length num_pages = max<Length>(num_bytes >> kPageShift, 1);
-    size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages(
-        num_pages) << kPageShift;
-    if (bytes_released > num_bytes) {
-      extra_bytes_released_ = bytes_released - num_bytes;
-    } else {
-      // The PageHeap wasn't able to release num_bytes.  Don't try to
-      // compensate with a big release next time.  Specifically,
-      // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX).
-      extra_bytes_released_ = 0;
-    }
-  }
-
-  virtual void SetMemoryReleaseRate(double rate) {
-    FLAGS_tcmalloc_release_rate = rate;
-  }
-
-  virtual double GetMemoryReleaseRate() {
-    return FLAGS_tcmalloc_release_rate;
-  }
-  virtual size_t GetEstimatedAllocatedSize(size_t size) {
-    if (size <= kMaxSize) {
-      const size_t cl = Static::sizemap()->SizeClass(size);
-      const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
-      return alloc_size;
-    } else {
-      return tcmalloc::pages(size) << kPageShift;
-    }
-  }
-
-  // This just calls GetSizeWithCallback, but because that's in an
-  // unnamed namespace, we need to move the definition below it in the
-  // file.
-  virtual size_t GetAllocatedSize(const void* ptr);
-
-  // This duplicates some of the logic in GetSizeWithCallback, but is
-  // faster.  This is important on OS X, where this function is called
-  // on every allocation operation.
-  virtual Ownership GetOwnership(const void* ptr) {
-    const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-    // The rest of tcmalloc assumes that all allocated pointers use at
-    // most kAddressBits bits.  If ptr doesn't, then it definitely
-    // wasn't alloacted by tcmalloc.
-    if ((p >> (kAddressBits - kPageShift)) > 0) {
-      return kNotOwned;
-    }
-    size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-    if (cl != 0) {
-      return kOwned;
-    }
-    const Span *span = Static::pageheap()->GetDescriptor(p);
-    return span ? kOwned : kNotOwned;
-  }
-
-  virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
-    static const char* kCentralCacheType = "tcmalloc.central";
-    static const char* kTransferCacheType = "tcmalloc.transfer";
-    static const char* kThreadCacheType = "tcmalloc.thread";
-    static const char* kPageHeapType = "tcmalloc.page";
-    static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped";
-    static const char* kLargeSpanType = "tcmalloc.large";
-    static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped";
-
-    v->clear();
-
-    // central class information
-    int64 prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
-      size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
-      MallocExtension::FreeListInfo i;
-      i.min_object_size = prev_class_size + 1;
-      i.max_object_size = class_size;
-      i.total_bytes_free =
-          Static::central_cache()[cl].length() * class_size;
-      i.type = kCentralCacheType;
-      v->push_back(i);
-
-      // transfer cache
-      i.total_bytes_free =
-          Static::central_cache()[cl].tc_length() * class_size;
-      i.type = kTransferCacheType;
-      v->push_back(i);
-
-      prev_class_size = Static::sizemap()->ByteSizeForClass(cl);
-    }
-
-    // Add stats from per-thread heaps
-    uint64_t class_count[kNumClasses];
-    memset(class_count, 0, sizeof(class_count));
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      uint64_t thread_bytes = 0;
-      ThreadCache::GetThreadStats(&thread_bytes, class_count);
-    }
-
-    prev_class_size = 0;
-    for (int cl = 1; cl < kNumClasses; ++cl) {
-      MallocExtension::FreeListInfo i;
-      i.min_object_size = prev_class_size + 1;
-      i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
-      i.total_bytes_free =
-          class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
-      i.type = kThreadCacheType;
-      v->push_back(i);
-    }
-
-    // append page heap info
-    PageHeap::SmallSpanStats small;
-    PageHeap::LargeSpanStats large;
-    {
-      SpinLockHolder h(Static::pageheap_lock());
-      Static::pageheap()->GetSmallSpanStats(&small);
-      Static::pageheap()->GetLargeSpanStats(&large);
-    }
-
-    // large spans: mapped
-    MallocExtension::FreeListInfo span_info;
-    span_info.type = kLargeSpanType;
-    span_info.max_object_size = (numeric_limits<size_t>::max)();
-    span_info.min_object_size = kMaxPages << kPageShift;
-    span_info.total_bytes_free = large.normal_pages << kPageShift;
-    v->push_back(span_info);
-
-    // large spans: unmapped
-    span_info.type = kLargeUnmappedSpanType;
-    span_info.total_bytes_free = large.returned_pages << kPageShift;
-    v->push_back(span_info);
-
-    // small spans
-    for (int s = 1; s < kMaxPages; s++) {
-      MallocExtension::FreeListInfo i;
-      i.max_object_size = (s << kPageShift);
-      i.min_object_size = ((s - 1) << kPageShift);
-
-      i.type = kPageHeapType;
-      i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
-      v->push_back(i);
-
-      i.type = kPageHeapUnmappedType;
-      i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
-      v->push_back(i);
-    }
-  }
-};
-
-// The constructor allocates an object to ensure that initialization
-// runs before main(), and therefore we do not have a chance to become
-// multi-threaded before initialization.  We also create the TSD key
-// here.  Presumably by the time this constructor runs, glibc is in
-// good enough shape to handle pthread_key_create().
-//
-// The constructor also takes the opportunity to tell STL to use
-// tcmalloc.  We want to do this early, before construct time, so
-// all user STL allocations go through tcmalloc (which works really
-// well for STL).
-//
-// The destructor prints stats when the program exits.
-static int tcmallocguard_refcount = 0;  // no lock needed: runs before main()
-TCMallocGuard::TCMallocGuard() {
-  if (tcmallocguard_refcount++ == 0) {
-    ReplaceSystemAlloc();    // defined in libc_override_*.h
-    tc_free(tc_malloc(1));
-    ThreadCache::InitTSD();
-    tc_free(tc_malloc(1));
-    // Either we, or debugallocation.cc, or valgrind will control memory
-    // management.  We register our extension if we're the winner.
-#ifdef TCMALLOC_USING_DEBUGALLOCATION
-    // Let debugallocation register its extension.
-#else
-    if (RunningOnValgrind()) {
-      // Let Valgrind uses its own malloc (so don't register our extension).
-    } else {
-      MallocExtension::Register(new TCMallocImplementation);
-    }
-#endif
-  }
-}
-
-TCMallocGuard::~TCMallocGuard() {
-  if (--tcmallocguard_refcount == 0) {
-    const char* env = NULL;
-    if (!RunningOnValgrind()) {
-      // Valgrind uses it's own malloc so we cannot do MALLOCSTATS
-      env = getenv("MALLOCSTATS");
-    }
-    if (env != NULL) {
-      int level = atoi(env);
-      if (level < 1) level = 1;
-      PrintStats(level);
-    }
-  }
-}
-#ifndef WIN32_OVERRIDE_ALLOCATORS
-static TCMallocGuard module_enter_exit_hook;
-#endif
-
-//-------------------------------------------------------------------
-// Helpers for the exported routines below
-//-------------------------------------------------------------------
-
-static inline bool CheckCachedSizeClass(void *ptr) {
-  PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
-  return cached_value == 0 ||
-      cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
-}
-
-static inline void* CheckedMallocResult(void *result) {
-  ASSERT(result == NULL || CheckCachedSizeClass(result));
-  return result;
-}
-
-static inline void* SpanToMallocResult(Span *span) {
-  Static::pageheap()->CacheSizeClass(span->start, 0);
-  return
-      CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
-}
-
-static void* DoSampledAllocation(size_t size) {
-#ifndef NO_TCMALLOC_SAMPLES
-  // Grab the stack trace outside the heap lock
-  StackTrace tmp;
-  tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
-  tmp.size = size;
-
-  SpinLockHolder h(Static::pageheap_lock());
-  // Allocate span
-  Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
-  if (UNLIKELY(span == NULL)) {
-    return NULL;
-  }
-
-  // Allocate stack trace
-  StackTrace *stack = Static::stacktrace_allocator()->New();
-  if (UNLIKELY(stack == NULL)) {
-    // Sampling failed because of lack of memory
-    return span;
-  }
-  *stack = tmp;
-  span->sample = 1;
-  span->objects = stack;
-  tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
-
-  return SpanToMallocResult(span);
-#else
-  abort();
-#endif
-}
-
-namespace {
-
-typedef void* (*malloc_fn)(void *arg);
-
-SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
-
-void* handle_oom(malloc_fn retry_fn,
-                 void* retry_arg,
-                 bool from_operator,
-                 bool nothrow) {
-  if (!from_operator && !tc_new_mode) {
-    // we're out of memory in C library function (malloc etc) and no
-    // "new mode" forced on us. Just return NULL
-    return NULL;
-  }
-  // we're OOM in operator new or "new mode" is set. We might have to
-  // call new_handle and maybe retry allocation.
-
-  for (;;) {
-    // Get the current new handler.  NB: this function is not
-    // thread-safe.  We make a feeble stab at making it so here, but
-    // this lock only protects against tcmalloc interfering with
-    // itself, not with other libraries calling set_new_handler.
-    std::new_handler nh;
-    {
-      SpinLockHolder h(&set_new_handler_lock);
-      nh = std::set_new_handler(0);
-      (void) std::set_new_handler(nh);
-    }
-#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
-    if (!nh) {
-      return NULL;
-    }
-    // Since exceptions are disabled, we don't really know if new_handler
-    // failed.  Assume it will abort if it fails.
-    (*nh)();
-#else
-    // If no new_handler is established, the allocation failed.
-    if (!nh) {
-      if (nothrow) {
-        return NULL;
-      }
-      throw std::bad_alloc();
-    }
-    // Otherwise, try the new_handler.  If it returns, retry the
-    // allocation.  If it throws std::bad_alloc, fail the allocation.
-    // if it throws something else, don't interfere.
-    try {
-      (*nh)();
-    } catch (const std::bad_alloc&) {
-      if (!nothrow) throw;
-      return NULL;
-    }
-#endif  // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
-
-    // we get here if new_handler returns successfully. So we retry
-    // allocation.
-    void* rv = retry_fn(retry_arg);
-    if (rv != NULL) {
-      return rv;
-    }
-
-    // if allocation failed again we go to next loop iteration
-  }
-}
-
-// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
-// automatic increases factored in.
-static int64_t large_alloc_threshold =
-  (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold
-   ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold);
-
-static void ReportLargeAlloc(Length num_pages, void* result) {
-  StackTrace stack;
-  stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1);
-
-  static const int N = 1000;
-  char buffer[N];
-  TCMalloc_Printer printer(buffer, N);
-  printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ",
-                 static_cast<uint64>(num_pages) << kPageShift,
-                 result);
-  for (int i = 0; i < stack.depth; i++) {
-    printer.printf(" %p", stack.stack[i]);
-  }
-  printer.printf("\n");
-  write(STDERR_FILENO, buffer, strlen(buffer));
-}
-
-void* do_memalign(size_t align, size_t size);
-
-struct retry_memaligh_data {
-  size_t align;
-  size_t size;
-};
-
-static void *retry_do_memalign(void *arg) {
-  retry_memaligh_data *data = static_cast<retry_memaligh_data *>(arg);
-  return do_memalign(data->align, data->size);
-}
-
-static void *maybe_do_cpp_memalign_slow(size_t align, size_t size) {
-  retry_memaligh_data data;
-  data.align = align;
-  data.size = size;
-  return handle_oom(retry_do_memalign, &data,
-                    false, true);
-}
-
-inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
-  void *rv = do_memalign(align, size);
-  if (LIKELY(rv != NULL)) {
-    return rv;
-  }
-  return maybe_do_cpp_memalign_slow(align, size);
-}
-
-// Must be called with the page lock held.
-inline bool should_report_large(Length num_pages) {
-  const int64 threshold = large_alloc_threshold;
-  if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
-    // Increase the threshold by 1/8 every time we generate a report.
-    // We cap the threshold at 8GiB to avoid overflow problems.
-    large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
-                             ? threshold + threshold/8 : 8ll<<30);
-    return true;
-  }
-  return false;
-}
-
-// Helper for do_malloc().
-inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
-  void* result;
-  bool report_large;
-
-  Length num_pages = tcmalloc::pages(size);
-
-  // NOTE: we're passing original size here as opposed to rounded-up
-  // size as we do in do_malloc_small. The difference is small here
-  // (at most 4k out of at least 256k). And not rounding up saves us
-  // from possibility of overflow, which rounding up could produce.
-  //
-  // See https://github.com/gperftools/gperftools/issues/723
-  if (heap->SampleAllocation(size)) {
-    result = DoSampledAllocation(size);
-
-    SpinLockHolder h(Static::pageheap_lock());
-    report_large = should_report_large(num_pages);
-  } else {
-    SpinLockHolder h(Static::pageheap_lock());
-    Span* span = Static::pageheap()->New(num_pages);
-    result = (UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span));
-    report_large = should_report_large(num_pages);
-  }
-
-  if (report_large) {
-    ReportLargeAlloc(num_pages, result);
-  }
-  return result;
-}
-
-ALWAYS_INLINE void* do_malloc_small(ThreadCache* heap, size_t size) {
-  ASSERT(Static::IsInited());
-  ASSERT(heap != NULL);
-  size_t cl = Static::sizemap()->SizeClass(size);
-  size = Static::sizemap()->class_to_size(cl);
-
-  if (UNLIKELY(heap->SampleAllocation(size))) {
-    return DoSampledAllocation(size);
-  } else {
-    // The common case, and also the simplest.  This just pops the
-    // size-appropriate freelist, after replenishing it if it's empty.
-    return CheckedMallocResult(heap->Allocate(size, cl));
-  }
-}
-
-ALWAYS_INLINE void* do_malloc(size_t size) {
-  if (ThreadCache::have_tls) {
-    if (LIKELY(size < ThreadCache::MinSizeForSlowPath())) {
-      return do_malloc_small(ThreadCache::GetCacheWhichMustBePresent(), size);
-    }
-    if (UNLIKELY(ThreadCache::IsUseEmergencyMalloc())) {
-      return tcmalloc::EmergencyMalloc(size);
-    }
-  }
-
-  if (size <= kMaxSize) {
-    return do_malloc_small(ThreadCache::GetCache(), size);
-  } else {
-    return do_malloc_pages(ThreadCache::GetCache(), size);
-  }
-}
-
-static void *retry_malloc(void* size) {
-  return do_malloc(reinterpret_cast<size_t>(size));
-}
-
-ALWAYS_INLINE void* do_malloc_or_cpp_alloc(size_t size) {
-  void *rv = do_malloc(size);
-  if (LIKELY(rv != NULL)) {
-    return rv;
-  }
-  return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
-                    false, true);
-}
-
-ALWAYS_INLINE void* do_calloc(size_t n, size_t elem_size) {
-  // Overflow check
-  const size_t size = n * elem_size;
-  if (elem_size != 0 && size / elem_size != n) return NULL;
-
-  void* result = do_malloc_or_cpp_alloc(size);
-  if (result != NULL) {
-    memset(result, 0, size);
-  }
-  return result;
-}
-
-// If ptr is NULL, do nothing.  Otherwise invoke the given function.
-inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) {
-  if (ptr != NULL) {
-    (*invalid_free_fn)(ptr);
-  }
-}
-
-// Helper for do_free_with_callback(), below.  Inputs:
-//   ptr is object to be freed
-//   invalid_free_fn is a function that gets invoked on certain "bad frees"
-//   heap is the ThreadCache for this thread, or NULL if it isn't known
-//   heap_must_be_valid is whether heap is known to be non-NULL
-//
-// This function may only be used after Static::IsInited() is true.
-//
-// We can usually detect the case where ptr is not pointing to a page that
-// tcmalloc is using, and in those cases we invoke invalid_free_fn.
-//
-// To maximize speed in the common case, we usually get here with
-// heap_must_be_valid being a manifest constant equal to true.
-ALWAYS_INLINE void do_free_helper(void* ptr,
-                                  void (*invalid_free_fn)(void*),
-                                  ThreadCache* heap,
-                                  bool heap_must_be_valid,
-                                  bool use_hint,
-                                  size_t size_hint) {
-  ASSERT((Static::IsInited() && heap != NULL) || !heap_must_be_valid);
-  if (!heap_must_be_valid && !Static::IsInited()) {
-    // We called free() before malloc().  This can occur if the
-    // (system) malloc() is called before tcmalloc is loaded, and then
-    // free() is called after tcmalloc is loaded (and tc_free has
-    // replaced free), but before the global constructor has run that
-    // sets up the tcmalloc data structures.
-    free_null_or_invalid(ptr, invalid_free_fn);
-    return;
-  }
-  Span* span = NULL;
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cl;
-  if (use_hint && Static::sizemap()->MaybeSizeClass(size_hint, &cl)) {
-    goto non_zero;
-  }
-
-  cl = Static::pageheap()->GetSizeClassIfCached(p);
-  if (UNLIKELY(cl == 0)) {
-    span = Static::pageheap()->GetDescriptor(p);
-    if (UNLIKELY(!span)) {
-      // span can be NULL because the pointer passed in is NULL or invalid
-      // (not something returned by malloc or friends), or because the
-      // pointer was allocated with some other allocator besides
-      // tcmalloc.  The latter can happen if tcmalloc is linked in via
-      // a dynamic library, but is not listed last on the link line.
-      // In that case, libraries after it on the link line will
-      // allocate with libc malloc, but free with tcmalloc's free.
-      free_null_or_invalid(ptr, invalid_free_fn);
-      return;
-    }
-    cl = span->sizeclass;
-    Static::pageheap()->CacheSizeClass(p, cl);
-  }
-
-  ASSERT(ptr != NULL);
-  if (LIKELY(cl != 0)) {
-  non_zero:
-    ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
-    if (heap_must_be_valid || heap != NULL) {
-      heap->Deallocate(ptr, cl);
-    } else {
-      // Delete directly into central cache
-      tcmalloc::SLL_SetNext(ptr, NULL);
-      Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
-    }
-  } else {
-    SpinLockHolder h(Static::pageheap_lock());
-    ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
-    ASSERT(span != NULL && span->start == p);
-    if (span->sample) {
-      StackTrace* st = reinterpret_cast<StackTrace*>(span->objects);
-      tcmalloc::DLL_Remove(span);
-      Static::stacktrace_allocator()->Delete(st);
-      span->objects = NULL;
-    }
-    Static::pageheap()->Delete(span);
-  }
-}
-
-// Helper for the object deletion (free, delete, etc.).  Inputs:
-//   ptr is object to be freed
-//   invalid_free_fn is a function that gets invoked on certain "bad frees"
-//
-// We can usually detect the case where ptr is not pointing to a page that
-// tcmalloc is using, and in those cases we invoke invalid_free_fn.
-ALWAYS_INLINE void do_free_with_callback(void* ptr,
-                                         void (*invalid_free_fn)(void*),
-                                         bool use_hint, size_t size_hint) {
-  ThreadCache* heap = NULL;
-  heap = ThreadCache::GetCacheIfPresent();
-  if (LIKELY(heap)) {
-    do_free_helper(ptr, invalid_free_fn, heap, true, use_hint, size_hint);
-  } else {
-    do_free_helper(ptr, invalid_free_fn, heap, false, use_hint, size_hint);
-  }
-}
-
-// The default "do_free" that uses the default callback.
-ALWAYS_INLINE void do_free(void* ptr) {
-  return do_free_with_callback(ptr, &InvalidFree, false, 0);
-}
-
-// NOTE: some logic here is duplicated in GetOwnership (above), for
-// speed.  If you change this function, look at that one too.
-inline size_t GetSizeWithCallback(const void* ptr,
-                                  size_t (*invalid_getsize_fn)(const void*)) {
-  if (ptr == NULL)
-    return 0;
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-  if (cl != 0) {
-    return Static::sizemap()->ByteSizeForClass(cl);
-  } else {
-    const Span *span = Static::pageheap()->GetDescriptor(p);
-    if (UNLIKELY(span == NULL)) {  // means we do not own this memory
-      return (*invalid_getsize_fn)(ptr);
-    } else if (span->sizeclass != 0) {
-      Static::pageheap()->CacheSizeClass(p, span->sizeclass);
-      return Static::sizemap()->ByteSizeForClass(span->sizeclass);
-    } else {
-      return span->length << kPageShift;
-    }
-  }
-}
-
-// This lets you call back to a given function pointer if ptr is invalid.
-// It is used primarily by windows code which wants a specialized callback.
-ALWAYS_INLINE void* do_realloc_with_callback(
-    void* old_ptr, size_t new_size,
-    void (*invalid_free_fn)(void*),
-    size_t (*invalid_get_size_fn)(const void*)) {
-  // Get the size of the old entry
-  const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
-
-  // Reallocate if the new size is larger than the old size,
-  // or if the new size is significantly smaller than the old size.
-  // We do hysteresis to avoid resizing ping-pongs:
-  //    . If we need to grow, grow to max(new_size, old_size * 1.X)
-  //    . Don't shrink unless new_size < old_size * 0.Y
-  // X and Y trade-off time for wasted space.  For now we do 1.25 and 0.5.
-  const size_t lower_bound_to_grow = old_size + old_size / 4ul;
-  const size_t upper_bound_to_shrink = old_size / 2ul;
-  if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
-    // Need to reallocate.
-    void* new_ptr = NULL;
-
-    if (new_size > old_size && new_size < lower_bound_to_grow) {
-      new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
-    }
-    if (new_ptr == NULL) {
-      // Either new_size is not a tiny increment, or last do_malloc failed.
-      new_ptr = do_malloc_or_cpp_alloc(new_size);
-    }
-    if (UNLIKELY(new_ptr == NULL)) {
-      return NULL;
-    }
-    MallocHook::InvokeNewHook(new_ptr, new_size);
-    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
-    MallocHook::InvokeDeleteHook(old_ptr);
-    // We could use a variant of do_free() that leverages the fact
-    // that we already know the sizeclass of old_ptr.  The benefit
-    // would be small, so don't bother.
-    do_free_with_callback(old_ptr, invalid_free_fn, false, 0);
-    return new_ptr;
-  } else {
-    // We still need to call hooks to report the updated size:
-    MallocHook::InvokeDeleteHook(old_ptr);
-    MallocHook::InvokeNewHook(old_ptr, new_size);
-    return old_ptr;
-  }
-}
-
-ALWAYS_INLINE void* do_realloc(void* old_ptr, size_t new_size) {
-  return do_realloc_with_callback(old_ptr, new_size,
-                                  &InvalidFree, &InvalidGetSizeForRealloc);
-}
-
-// For use by exported routines below that want specific alignments
-//
-// Note: this code can be slow for alignments > 16, and can
-// significantly fragment memory.  The expectation is that
-// memalign/posix_memalign/valloc/pvalloc will not be invoked very
-// often.  This requirement simplifies our implementation and allows
-// us to tune for expected allocation patterns.
-void* do_memalign(size_t align, size_t size) {
-  ASSERT((align & (align - 1)) == 0);
-  ASSERT(align > 0);
-  if (size + align < size) return NULL;         // Overflow
-
-  // Fall back to malloc if we would already align this memory access properly.
-  if (align <= AlignmentForSize(size)) {
-    void* p = do_malloc(size);
-    ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0);
-    return p;
-  }
-
-  if (UNLIKELY(Static::pageheap() == NULL)) ThreadCache::InitModule();
-
-  // Allocate at least one byte to avoid boundary conditions below
-  if (size == 0) size = 1;
-
-  if (size <= kMaxSize && align < kPageSize) {
-    // Search through acceptable size classes looking for one with
-    // enough alignment.  This depends on the fact that
-    // InitSizeClasses() currently produces several size classes that
-    // are aligned at powers of two.  We will waste time and space if
-    // we miss in the size class array, but that is deemed acceptable
-    // since memalign() should be used rarely.
-    int cl = Static::sizemap()->SizeClass(size);
-    while (cl < kNumClasses &&
-           ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
-      cl++;
-    }
-    if (cl < kNumClasses) {
-      ThreadCache* heap = ThreadCache::GetCache();
-      size = Static::sizemap()->class_to_size(cl);
-      return CheckedMallocResult(heap->Allocate(size, cl));
-    }
-  }
-
-  // We will allocate directly from the page heap
-  SpinLockHolder h(Static::pageheap_lock());
-
-  if (align <= kPageSize) {
-    // Any page-level allocation will be fine
-    // TODO: We could put the rest of this page in the appropriate
-    // TODO: cache but it does not seem worth it.
-    Span* span = Static::pageheap()->New(tcmalloc::pages(size));
-    return UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span);
-  }
-
-  // Allocate extra pages and carve off an aligned portion
-  const Length alloc = tcmalloc::pages(size + align);
-  Span* span = Static::pageheap()->New(alloc);
-  if (UNLIKELY(span == NULL)) return NULL;
-
-  // Skip starting portion so that we end up aligned
-  Length skip = 0;
-  while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
-    skip++;
-  }
-  ASSERT(skip < alloc);
-  if (skip > 0) {
-    Span* rest = Static::pageheap()->Split(span, skip);
-    Static::pageheap()->Delete(span);
-    span = rest;
-  }
-
-  // Skip trailing portion that we do not need to return
-  const Length needed = tcmalloc::pages(size);
-  ASSERT(span->length >= needed);
-  if (span->length > needed) {
-    Span* trailer = Static::pageheap()->Split(span, needed);
-    Static::pageheap()->Delete(trailer);
-  }
-  return SpanToMallocResult(span);
-}
-
-// Helpers for use by exported routines below:
-
-inline void do_malloc_stats() {
-  PrintStats(1);
-}
-
-inline int do_mallopt(int cmd, int value) {
-  return 1;     // Indicates error
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-inline struct mallinfo do_mallinfo() {
-  TCMallocStats stats;
-  ExtractStats(&stats, NULL, NULL, NULL);
-
-  // Just some of the fields are filled in.
-  struct mallinfo info;
-  memset(&info, 0, sizeof(info));
-
-  // Unfortunately, the struct contains "int" field, so some of the
-  // size values will be truncated.
-  info.arena     = static_cast<int>(stats.pageheap.system_bytes);
-  info.fsmblks   = static_cast<int>(stats.thread_bytes
-                                    + stats.central_bytes
-                                    + stats.transfer_bytes);
-  info.fordblks  = static_cast<int>(stats.pageheap.free_bytes +
-                                    stats.pageheap.unmapped_bytes);
-  info.uordblks  = static_cast<int>(stats.pageheap.system_bytes
-                                    - stats.thread_bytes
-                                    - stats.central_bytes
-                                    - stats.transfer_bytes
-                                    - stats.pageheap.free_bytes
-                                    - stats.pageheap.unmapped_bytes);
-
-  return info;
-}
-#endif  // HAVE_STRUCT_MALLINFO
-
-inline void* cpp_alloc(size_t size, bool nothrow) {
-  void* p = do_malloc(size);
-  if (LIKELY(p)) {
-    return p;
-  }
-  return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
-                    true, nothrow);
-}
-
-}  // end unnamed namespace
-
-// As promised, the definition of this function, declared above.
-size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
-  if (ptr == NULL)
-    return 0;
-  ASSERT(TCMallocImplementation::GetOwnership(ptr)
-         != TCMallocImplementation::kNotOwned);
-  return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
-}
-
-void TCMallocImplementation::MarkThreadBusy() {
-  // Allocate to force the creation of a thread cache, but avoid
-  // invoking any hooks.
-  do_free(do_malloc(0));
-}
-
-//-------------------------------------------------------------------
-// Exported routines
-//-------------------------------------------------------------------
-
-extern "C" PERFTOOLS_DLL_DECL const char* tc_version(
-    int* major, int* minor, const char** patch) PERFTOOLS_THROW {
-  if (major) *major = TC_VERSION_MAJOR;
-  if (minor) *minor = TC_VERSION_MINOR;
-  if (patch) *patch = TC_VERSION_PATCH;
-  return TC_VERSION_STRING;
-}
-
-// This function behaves similarly to MSVC's _set_new_mode.
-// If flag is 0 (default), calls to malloc will behave normally.
-// If flag is 1, calls to malloc will behave like calls to new,
-// and the std_new_handler will be invoked on failure.
-// Returns the previous mode.
-extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW {
-  int old_mode = tc_new_mode;
-  tc_new_mode = flag;
-  return old_mode;
-}
-
-#ifndef TCMALLOC_USING_DEBUGALLOCATION  // debugallocation.cc defines its own
-
-#if defined(__GNUC__) && defined(__ELF__) && !defined(TCMALLOC_NO_ALIASES)
-#define TC_ALIAS(name) __attribute__((alias(#name)))
-#endif
-
-// CAVEAT: The code structure below ensures that MallocHook methods are always
-//         called from the stack frame of the invoked allocation function.
-//         heap-checker.cc depends on this to start a stack trace from
-//         the call to the (de)allocation function.
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW {
-  void* result = do_malloc_or_cpp_alloc(size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW {
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW {
-  if ((reinterpret_cast<uintptr_t>(ptr) & (kPageSize-1)) == 0) {
-    tc_free(ptr);
-    return;
-  }
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free_with_callback(ptr, &InvalidFree, true, size);
-}
-
-#ifdef TC_ALIAS
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw()
-  TC_ALIAS(tc_free_sized);
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw()
-  TC_ALIAS(tc_free_sized);
-
-#else
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw() {
-  tc_free_sized(p, size);
-}
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw() {
-  tc_free_sized(p, size);
-}
-
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n,
-                                              size_t elem_size) PERFTOOLS_THROW {
-  if (ThreadCache::IsUseEmergencyMalloc()) {
-    return tcmalloc::EmergencyCalloc(n, elem_size);
-  }
-  void* result = do_calloc(n, elem_size);
-  MallocHook::InvokeNewHook(result, n * elem_size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(ptr);
-  do_free(ptr);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr,
-                                               size_t new_size) PERFTOOLS_THROW {
-  if (old_ptr == NULL) {
-    void* result = do_malloc_or_cpp_alloc(new_size);
-    MallocHook::InvokeNewHook(result, new_size);
-    return result;
-  }
-  if (new_size == 0) {
-    MallocHook::InvokeDeleteHook(old_ptr);
-    do_free(old_ptr);
-    return NULL;
-  }
-  if (UNLIKELY(tcmalloc::IsEmergencyPtr(old_ptr))) {
-    return tcmalloc::EmergencyRealloc(old_ptr, new_size);
-  }
-  return do_realloc(old_ptr, new_size);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
-  void* p = cpp_alloc(size, false);
-  // We keep this next instruction out of cpp_alloc for a reason: when
-  // it's in, and new just calls cpp_alloc, the optimizer may fold the
-  // new call into cpp_alloc, which messes up our whole section-based
-  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
-  // isn't the last thing this fn calls, and prevents the folding.
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW {
-  void* p = cpp_alloc(size, true);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-// Standard C++ library implementations define and use this
-// (via ::operator delete(ptr, nothrow)).
-// But it's really the same as normal delete, so we just do the same thing.
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size)
-#ifdef TC_ALIAS
-TC_ALIAS(tc_new);
-#else
-{
-  void* p = cpp_alloc(size, false);
-  // We keep this next instruction out of cpp_alloc for a reason: when
-  // it's in, and new just calls cpp_alloc, the optimizer may fold the
-  // new call into cpp_alloc, which messes up our whole section-based
-  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
-  // isn't the last thing this fn calls, and prevents the folding.
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
-    PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_new_nothrow);
-#else
-{
-  void* p = cpp_alloc(size, true);
-  MallocHook::InvokeNewHook(p, size);
-  return p;
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW
-#ifdef TC_ALIAS
-TC_ALIAS(tc_free);
-#else
-{
-  MallocHook::InvokeDeleteHook(p);
-  do_free(p);
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align,
-                                                size_t size) PERFTOOLS_THROW {
-  void* result = do_memalign_or_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(
-    void** result_ptr, size_t align, size_t size) PERFTOOLS_THROW {
-  if (((align % sizeof(void*)) != 0) ||
-      ((align & (align - 1)) != 0) ||
-      (align == 0)) {
-    return EINVAL;
-  }
-
-  void* result = do_memalign_or_cpp_memalign(align, size);
-  MallocHook::InvokeNewHook(result, size);
-  if (UNLIKELY(result == NULL)) {
-    return ENOMEM;
-  } else {
-    *result_ptr = result;
-    return 0;
-  }
-}
-
-static size_t pagesize = 0;
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW {
-  // Allocate page-aligned object of length >= size bytes
-  if (pagesize == 0) pagesize = getpagesize();
-  void* result = do_memalign_or_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW {
-  // Round up size to a multiple of pagesize
-  if (pagesize == 0) pagesize = getpagesize();
-  if (size == 0) {     // pvalloc(0) should allocate one page, according to
-    size = pagesize;   // http://man.free4web.biz/man3/libmpatrol.3.html
-  }
-  size = (size + pagesize - 1) & ~(pagesize - 1);
-  void* result = do_memalign_or_cpp_memalign(pagesize, size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW {
-  do_malloc_stats();
-}
-
-extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW {
-  return do_mallopt(cmd, value);
-}
-
-#ifdef HAVE_STRUCT_MALLINFO
-extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW {
-  return do_mallinfo();
-}
-#endif
-
-extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW {
-  return MallocExtension::instance()->GetAllocatedSize(ptr);
-}
-
-extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size)  PERFTOOLS_THROW {
-  void* result = do_malloc(size);
-  MallocHook::InvokeNewHook(result, size);
-  return result;
-}
-
-#pragma GCC diagnostic pop
-
-#endif  // TCMALLOC_USING_DEBUGALLOCATION
diff --git a/contrib/libtcmalloc/src/tcmalloc.h b/contrib/libtcmalloc/src/tcmalloc.h
deleted file mode 100644
index 70d567268c2..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein <opensource@google.com>
-//
-// Some obscure memory-allocation routines may not be declared on all
-// systems.  In those cases, we'll just declare them ourselves.
-// This file is meant to be used only internally, for unittests.
-
-#include "config.h"
-
-#ifndef _XOPEN_SOURCE
-# define _XOPEN_SOURCE 600  // for posix_memalign
-#endif
-#include <stdlib.h>         // for posix_memalign
-// FreeBSD has malloc.h, but complains if you use it
-#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__)
-#include <malloc.h>         // for memalign, valloc, pvalloc
-#endif
-
-// __THROW is defined in glibc systems.  It means, counter-intuitively,
-// "This function will never throw an exception."  It's an optional
-// optimization tool, but we may need to use it to match glibc prototypes.
-#ifndef __THROW    // I guess we're not on a glibc system
-# define __THROW   // __THROW is just an optimization, so ok to make it ""
-#endif
-
-#if !HAVE_CFREE_SYMBOL
-extern "C" void cfree(void* ptr) __THROW;
-#endif
-#if !HAVE_POSIX_MEMALIGN_SYMBOL
-extern "C" int posix_memalign(void** ptr, size_t align, size_t size) __THROW;
-#endif
-#if !HAVE_MEMALIGN_SYMBOL
-extern "C" void* memalign(size_t __alignment, size_t __size) __THROW;
-#endif
-#if !HAVE_VALLOC_SYMBOL
-extern "C" void* valloc(size_t __size) __THROW;
-#endif
-#if !HAVE_PVALLOC_SYMBOL
-extern "C" void* pvalloc(size_t __size) __THROW;
-#endif
diff --git a/contrib/libtcmalloc/src/tcmalloc_guard.h b/contrib/libtcmalloc/src/tcmalloc_guard.h
deleted file mode 100644
index 84952bac2ea..00000000000
--- a/contrib/libtcmalloc/src/tcmalloc_guard.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Craig Silverstein
-//
-// We expose the TCMallocGuard class -- which initializes the tcmalloc
-// allocator -- so classes that need to be sure tcmalloc is loaded
-// before they do stuff -- notably heap-profiler -- can.  To use this
-// create a static TCMallocGuard instance at the top of a file where
-// you need tcmalloc to be initialized before global constructors run.
-
-#ifndef TCMALLOC_TCMALLOC_GUARD_H_
-#define TCMALLOC_TCMALLOC_GUARD_H_
-
-class TCMallocGuard {
- public:
-  TCMallocGuard();
-  ~TCMallocGuard();
-};
-
-#endif  // TCMALLOC_TCMALLOC_GUARD_H_
diff --git a/contrib/libtcmalloc/src/third_party/valgrind.h b/contrib/libtcmalloc/src/third_party/valgrind.h
deleted file mode 100644
index 577c59ab0cd..00000000000
--- a/contrib/libtcmalloc/src/third_party/valgrind.h
+++ /dev/null
@@ -1,3924 +0,0 @@
-/* -*- c -*-
-   ----------------------------------------------------------------
-
-   Notice that the following BSD-style license applies to this one
-   file (valgrind.h) only.  The rest of Valgrind is licensed under the
-   terms of the GNU General Public License, version 2, unless
-   otherwise indicated.  See the COPYING file in the source
-   distribution for details.
-
-   ----------------------------------------------------------------
-
-   This file is part of Valgrind, a dynamic binary instrumentation
-   framework.
-
-   Copyright (C) 2000-2008 Julian Seward.  All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   1. Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-   2. The origin of this software must not be misrepresented; you must 
-      not claim that you wrote the original software.  If you use this 
-      software in a product, an acknowledgment in the product 
-      documentation would be appreciated but is not required.
-
-   3. Altered source versions must be plainly marked as such, and must
-      not be misrepresented as being the original software.
-
-   4. The name of the author may not be used to endorse or promote 
-      products derived from this software without specific prior written 
-      permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
-   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   ----------------------------------------------------------------
-
-   Notice that the above BSD-style license applies to this one file
-   (valgrind.h) only.  The entire rest of Valgrind is licensed under
-   the terms of the GNU General Public License, version 2.  See the
-   COPYING file in the source distribution for details.
-
-   ---------------------------------------------------------------- 
-*/
-
-
-/* This file is for inclusion into client (your!) code.
-
-   You can use these macros to manipulate and query Valgrind's 
-   execution inside your own programs.
-
-   The resulting executables will still run without Valgrind, just a
-   little bit more slowly than they otherwise would, but otherwise
-   unchanged.  When not running on valgrind, each client request
-   consumes very few (eg. 7) instructions, so the resulting performance
-   loss is negligible unless you plan to execute client requests
-   millions of times per second.  Nevertheless, if that is still a
-   problem, you can compile with the NVALGRIND symbol defined (gcc
-   -DNVALGRIND) so that client requests are not even compiled in.  */
-
-#ifndef __VALGRIND_H
-#define __VALGRIND_H
-
-#include <stdarg.h>
-
-/* Nb: this file might be included in a file compiled with -ansi.  So
-   we can't use C++ style "//" comments nor the "asm" keyword (instead
-   use "__asm__"). */
-
-/* Derive some tags indicating what the target platform is.  Note
-   that in this file we're using the compiler's CPP symbols for
-   identifying architectures, which are different to the ones we use
-   within the rest of Valgrind.  Note, __powerpc__ is active for both
-   32 and 64-bit PPC, whereas __powerpc64__ is only active for the
-   latter (on Linux, that is). */
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_ppc32_aix5
-#undef PLAT_ppc64_aix5
-
-#if !defined(_AIX) && defined(__i386__)
-#  define PLAT_x86_linux 1
-#elif !defined(_AIX) && defined(__x86_64__)
-#  define PLAT_amd64_linux 1
-#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__)
-#  define PLAT_ppc32_linux 1
-#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__)
-#  define PLAT_ppc64_linux 1
-#elif defined(_AIX) && defined(__64BIT__)
-#  define PLAT_ppc64_aix5 1
-#elif defined(_AIX) && !defined(__64BIT__)
-#  define PLAT_ppc32_aix5 1
-#endif
-
-
-/* If we're not compiling for our target platform, don't generate
-   any inline asms.  */
-#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \
-    && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \
-    && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5)
-#  if !defined(NVALGRIND)
-#    define NVALGRIND 1
-#  endif
-#endif
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS.  There is nothing */
-/* in here of use to end-users -- skip to the next section.           */
-/* ------------------------------------------------------------------ */
-
-#if defined(NVALGRIND)
-
-/* Define NVALGRIND to completely remove the Valgrind magic sequence
-   from the compiled code (analogous to NDEBUG's effects on
-   assert()) */
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-   {                                                              \
-      (_zzq_rlval) = (_zzq_default);                              \
-   }
-
-#else  /* ! NVALGRIND */
-
-/* The following defines the magic code sequences which the JITter
-   spots and handles magically.  Don't look too closely at them as
-   they will rot your brain.
-
-   The assembly code sequences for all architectures is in this one
-   file.  This is because this file must be stand-alone, and we don't
-   want to have multiple files.
-
-   For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
-   value gets put in the return slot, so that everything works when
-   this is executed not under Valgrind.  Args are passed in a memory
-   block, and so there's no intrinsic limit to the number that could
-   be passed, but it's currently five.
-   
-   The macro args are: 
-      _zzq_rlval    result lvalue
-      _zzq_default  default value (result returned when running on real CPU)
-      _zzq_request  request code
-      _zzq_arg1..5  request params
-
-   The other two macros are used to support function wrapping, and are
-   a lot simpler.  VALGRIND_GET_NR_CONTEXT returns the value of the
-   guest's NRADDR pseudo-register and whatever other information is
-   needed to safely run the call original from the wrapper: on
-   ppc64-linux, the R2 value at the divert point is also needed.  This
-   information is abstracted into a user-visible type, OrigFn.
-
-   VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
-   guest, but guarantees that the branch instruction will not be
-   redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
-   branch-and-link-to-r11.  VALGRIND_CALL_NOREDIR is just text, not a
-   complete inline asm, since it needs to be combined with more magic
-   inline asm stuff to be useful.
-*/
-
-/* ------------------------- x86-linux ------------------------- */
-
-#if defined(PLAT_x86_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "roll $3,  %%edi ; roll $13, %%edi\n\t"      \
-                     "roll $29, %%edi ; roll $19, %%edi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned int _zzq_args[6];                           \
-    volatile unsigned int _zzq_result;                            \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EDX = client_request ( %EAX ) */         \
-                     "xchgl %%ebx,%%ebx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %EAX = guest_NRADDR */                    \
-                     "xchgl %%ecx,%%ecx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_EAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%EAX */                     \
-                     "xchgl %%edx,%%edx\n\t"
-#endif /* PLAT_x86_linux */
-
-/* ------------------------ amd64-linux ------------------------ */
-
-#if defined(PLAT_amd64_linux)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rolq $3,  %%rdi ; rolq $13, %%rdi\n\t"      \
-                     "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-  { volatile unsigned long long int _zzq_args[6];                 \
-    volatile unsigned long long int _zzq_result;                  \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RDX = client_request ( %RAX ) */         \
-                     "xchgq %%rbx,%%rbx"                          \
-                     : "=d" (_zzq_result)                         \
-                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    volatile unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %RAX = guest_NRADDR */                    \
-                     "xchgq %%rcx,%%rcx"                          \
-                     : "=a" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_CALL_NOREDIR_RAX                                 \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* call-noredir *%RAX */                     \
-                     "xchgq %%rdx,%%rdx\n\t"
-#endif /* PLAT_amd64_linux */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[6];                          \
-             unsigned int  _zzq_result;                           \
-             unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
-                     "mr 4,%2\n\t" /*ptr*/                        \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"     /*result*/                     \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
-                     : "cc", "memory", "r3", "r4");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    unsigned int __addr;                                          \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory", "r3"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[6];                \
-    register unsigned long long int  _zzq_result __asm__("r3");   \
-    register unsigned long long int* _zzq_ptr __asm__("r4");      \
-    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
-    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1"                                   \
-                     : "=r" (_zzq_result)                         \
-                     : "0" (_zzq_default), "r" (_zzq_ptr)         \
-                     : "cc", "memory");                           \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr __asm__("r3");         \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4"                                   \
-                     : "=r" (__addr)                              \
-                     :                                            \
-                     : "cc", "memory"                             \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-typedef
-   struct { 
-      unsigned int nraddr; /* where's the code? */
-      unsigned int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned int  _zzq_args[7];                          \
-    register unsigned int  _zzq_result;                           \
-    register unsigned int* _zzq_ptr;                              \
-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
-    _zzq_args[6] = (unsigned int)(_zzq_default);                  \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "lwz 3, 24(4)\n\t"                           \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned int __addr;                                 \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-typedef
-   struct { 
-      unsigned long long int nraddr; /* where's the code? */
-      unsigned long long int r2;  /* what tocptr do we need? */
-   }
-   OrigFn;
-
-#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
-
-#define VALGRIND_DO_CLIENT_REQUEST(                               \
-        _zzq_rlval, _zzq_default, _zzq_request,                   \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
-                                                                  \
-  {          unsigned long long int  _zzq_args[7];                \
-    register unsigned long long int  _zzq_result;                 \
-    register unsigned long long int* _zzq_ptr;                    \
-    _zzq_args[0] = (unsigned int long long)(_zzq_request);        \
-    _zzq_args[1] = (unsigned int long long)(_zzq_arg1);           \
-    _zzq_args[2] = (unsigned int long long)(_zzq_arg2);           \
-    _zzq_args[3] = (unsigned int long long)(_zzq_arg3);           \
-    _zzq_args[4] = (unsigned int long long)(_zzq_arg4);           \
-    _zzq_args[5] = (unsigned int long long)(_zzq_arg5);           \
-    _zzq_args[6] = (unsigned int long long)(_zzq_default);        \
-    _zzq_ptr = _zzq_args;                                         \
-    __asm__ volatile("mr 4,%1\n\t"                                \
-                     "ld 3, 48(4)\n\t"                            \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = client_request ( %R4 ) */           \
-                     "or 1,1,1\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (_zzq_result)                         \
-                     : "b" (_zzq_ptr)                             \
-                     : "r3", "r4", "cc", "memory");               \
-    _zzq_rlval = _zzq_result;                                     \
-  }
-
-#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
-  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
-    register unsigned long long int __addr;                       \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR */                     \
-                     "or 2,2,2\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->nraddr = __addr;                                   \
-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* %R3 = guest_NRADDR_GPR2 */                \
-                     "or 4,4,4\n\t"                               \
-                     "mr %0,3"                                    \
-                     : "=b" (__addr)                              \
-                     :                                            \
-                     : "r3", "cc", "memory"                       \
-                    );                                            \
-    _zzq_orig->r2 = __addr;                                       \
-  }
-
-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
-                     __SPECIAL_INSTRUCTION_PREAMBLE               \
-                     /* branch-and-link-to-noredir *%R11 */       \
-                     "or 3,3,3\n\t"
-
-#endif /* PLAT_ppc64_aix5 */
-
-/* Insert assembly code for other platforms here... */
-
-#endif /* NVALGRIND */
-
-
-/* ------------------------------------------------------------------ */
-/* PLATFORM SPECIFICS for FUNCTION WRAPPING.  This is all very        */
-/* ugly.  It's the least-worst tradeoff I can think of.               */
-/* ------------------------------------------------------------------ */
-
-/* This section defines magic (a.k.a appalling-hack) macros for doing
-   guaranteed-no-redirection macros, so as to get from function
-   wrappers to the functions they are wrapping.  The whole point is to
-   construct standard call sequences, but to do the call itself with a
-   special no-redirect call pseudo-instruction that the JIT
-   understands and handles specially.  This section is long and
-   repetitious, and I can't see a way to make it shorter.
-
-   The naming scheme is as follows:
-
-      CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
-
-   'W' stands for "word" and 'v' for "void".  Hence there are
-   different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
-   and for each, the possibility of returning a word-typed result, or
-   no result.
-*/
-
-/* Use these to write the name of your wrapper.  NOTE: duplicates
-   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
-
-#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname)                    \
-   _vgwZU_##soname##_##fnname
-
-#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname)                    \
-   _vgwZZ_##soname##_##fnname
-
-/* Use this macro from within a wrapper function to collect the
-   context (address and possibly other info) of the original function.
-   Once you have that you can then use it in one of the CALL_FN_
-   macros.  The type of the argument _lval is OrigFn. */
-#define VALGRIND_GET_ORIG_FN(_lval)  VALGRIND_GET_NR_CONTEXT(_lval)
-
-/* Derivatives of the main macros below, for calling functions
-   returning void. */
-
-#define CALL_FN_v_v(fnptr)                                        \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_v(_junk,fnptr); } while (0)
-
-#define CALL_FN_v_W(fnptr, arg1)                                  \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
-
-#define CALL_FN_v_WW(fnptr, arg1,arg2)                            \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
-
-#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3)                      \
-   do { volatile unsigned long _junk;                             \
-        CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
-
-/* ------------------------- x86-linux ------------------------- */
-
-#if defined(PLAT_x86_linux)
-
-/* These regs are trashed by the hidden call.  No need to mention eax
-   as gcc can already see that, plus causes gcc to bomb. */
-#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
-
-/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $4, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $8, %%esp\n"                                       \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $12, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $16, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $20, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $24, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $28, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $32, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $36, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $40, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11)                          \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $44, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
-                                  arg6,arg7,arg8,arg9,arg10,      \
-                                  arg11,arg12)                    \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "pushl 48(%%eax)\n\t"                                    \
-         "pushl 44(%%eax)\n\t"                                    \
-         "pushl 40(%%eax)\n\t"                                    \
-         "pushl 36(%%eax)\n\t"                                    \
-         "pushl 32(%%eax)\n\t"                                    \
-         "pushl 28(%%eax)\n\t"                                    \
-         "pushl 24(%%eax)\n\t"                                    \
-         "pushl 20(%%eax)\n\t"                                    \
-         "pushl 16(%%eax)\n\t"                                    \
-         "pushl 12(%%eax)\n\t"                                    \
-         "pushl 8(%%eax)\n\t"                                     \
-         "pushl 4(%%eax)\n\t"                                     \
-         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
-         VALGRIND_CALL_NOREDIR_EAX                                \
-         "addl $48, %%esp\n"                                      \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_x86_linux */
-
-/* ------------------------ amd64-linux ------------------------ */
-
-#if defined(PLAT_amd64_linux)
-
-/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi",       \
-                            "rdi", "r8", "r9", "r10", "r11"
-
-/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
-   long) == 8. */
-
-/* NB 9 Sept 07.  There is a nasty kludge here in all these CALL_FN_
-   macros.  In order not to trash the stack redzone, we need to drop
-   %rsp by 128 before the hidden call, and restore afterwards.  The
-   nastyness is that it is only by luck that the stack still appears
-   to be unwindable during the hidden call - since then the behaviour
-   of any routine using this macro does not match what the CFI data
-   says.  Sigh.
-
-   Why is this important?  Imagine that a wrapper has a stack
-   allocated local, and passes to the hidden call, a pointer to it.
-   Because gcc does not know about the hidden call, it may allocate
-   that local in the redzone.  Unfortunately the hidden call may then
-   trash it before it comes to use it.  So we must step clear of the
-   redzone, for the duration of the hidden call, to make it safe.
-
-   Probably the same problem afflicts the other redzone-style ABIs too
-   (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
-   self describing (none of this CFI nonsense) so at least messing
-   with the stack pointer doesn't give a danger of non-unwindable
-   stack. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         "addq $128,%%rsp\n\t"                                    \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $8, %%rsp\n"                                       \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $16, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $24, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $32, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $40, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)(arg1);                         \
-      _argvec[2] = (unsigned long)(arg2);                         \
-      _argvec[3] = (unsigned long)(arg3);                         \
-      _argvec[4] = (unsigned long)(arg4);                         \
-      _argvec[5] = (unsigned long)(arg5);                         \
-      _argvec[6] = (unsigned long)(arg6);                         \
-      _argvec[7] = (unsigned long)(arg7);                         \
-      _argvec[8] = (unsigned long)(arg8);                         \
-      _argvec[9] = (unsigned long)(arg9);                         \
-      _argvec[10] = (unsigned long)(arg10);                       \
-      _argvec[11] = (unsigned long)(arg11);                       \
-      _argvec[12] = (unsigned long)(arg12);                       \
-      __asm__ volatile(                                           \
-         "subq $128,%%rsp\n\t"                                    \
-         "pushq 96(%%rax)\n\t"                                    \
-         "pushq 88(%%rax)\n\t"                                    \
-         "pushq 80(%%rax)\n\t"                                    \
-         "pushq 72(%%rax)\n\t"                                    \
-         "pushq 64(%%rax)\n\t"                                    \
-         "pushq 56(%%rax)\n\t"                                    \
-         "movq 48(%%rax), %%r9\n\t"                               \
-         "movq 40(%%rax), %%r8\n\t"                               \
-         "movq 32(%%rax), %%rcx\n\t"                              \
-         "movq 24(%%rax), %%rdx\n\t"                              \
-         "movq 16(%%rax), %%rsi\n\t"                              \
-         "movq 8(%%rax), %%rdi\n\t"                               \
-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
-         VALGRIND_CALL_NOREDIR_RAX                                \
-         "addq $48, %%rsp\n"                                      \
-         "addq $128,%%rsp\n\t"                                    \
-         : /*out*/   "=a" (_res)                                  \
-         : /*in*/    "a" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_amd64_linux */
-
-/* ------------------------ ppc32-linux ------------------------ */
-
-#if defined(PLAT_ppc32_linux)
-
-/* This is useful for finding out about the on-stack stuff:
-
-   extern int f9  ( int,int,int,int,int,int,int,int,int );
-   extern int f10 ( int,int,int,int,int,int,int,int,int,int );
-   extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
-   extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
-
-   int g9 ( void ) {
-      return f9(11,22,33,44,55,66,77,88,99);
-   }
-   int g10 ( void ) {
-      return f10(11,22,33,44,55,66,77,88,99,110);
-   }
-   int g11 ( void ) {
-      return f11(11,22,33,44,55,66,77,88,99,110,121);
-   }
-   int g12 ( void ) {
-      return f12(11,22,33,44,55,66,77,88,99,110,121,132);
-   }
-*/
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc32-linux, 
-   sizeof(unsigned long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[1];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[2];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[4];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[5];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[6];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[7];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[8];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[9];                          \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[10];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[11];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-16\n\t"                                       \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,16\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[12];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[13];                         \
-      volatile unsigned long _res;                                \
-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
-      _argvec[1] = (unsigned long)arg1;                           \
-      _argvec[2] = (unsigned long)arg2;                           \
-      _argvec[3] = (unsigned long)arg3;                           \
-      _argvec[4] = (unsigned long)arg4;                           \
-      _argvec[5] = (unsigned long)arg5;                           \
-      _argvec[6] = (unsigned long)arg6;                           \
-      _argvec[7] = (unsigned long)arg7;                           \
-      _argvec[8] = (unsigned long)arg8;                           \
-      _argvec[9] = (unsigned long)arg9;                           \
-      _argvec[10] = (unsigned long)arg10;                         \
-      _argvec[11] = (unsigned long)arg11;                         \
-      _argvec[12] = (unsigned long)arg12;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "addi 1,1,-32\n\t"                                       \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,20(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,16(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,12(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,8(1)\n\t"                                         \
-         /* args1-8 */                                            \
-         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
-         "lwz 4,8(11)\n\t"                                        \
-         "lwz 5,12(11)\n\t"                                       \
-         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
-         "lwz 7,20(11)\n\t"                                       \
-         "lwz 8,24(11)\n\t"                                       \
-         "lwz 9,28(11)\n\t"                                       \
-         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
-         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "addi 1,1,32\n\t"                                        \
-         "mr %0,3"                                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_linux */
-
-/* ------------------------ ppc64-linux ------------------------ */
-
-#if defined(PLAT_ppc64_linux)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)" /* restore tocptr */                      \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-128\n\t"  /* expand stack frame */            \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,128"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         "std 2,-16(11)\n\t"  /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "addi 1,1,-144\n\t"  /* expand stack frame */            \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         "addi 1,1,144"     /* restore frame */                   \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_linux */
-
-/* ------------------------ ppc32-aix5 ------------------------- */
-
-#if defined(PLAT_ppc32_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "lwz  3," #_n_fr "(1)\n\t"                               \
-         "stw  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
-   long) == 4. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t" /* arg2->r4 */                       \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(64)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
-         /* arg12 */                                              \
-         "lwz 3,48(11)\n\t"                                       \
-         "stw 3,68(1)\n\t"                                        \
-         /* arg11 */                                              \
-         "lwz 3,44(11)\n\t"                                       \
-         "stw 3,64(1)\n\t"                                        \
-         /* arg10 */                                              \
-         "lwz 3,40(11)\n\t"                                       \
-         "stw 3,60(1)\n\t"                                        \
-         /* arg9 */                                               \
-         "lwz 3,36(11)\n\t"                                       \
-         "stw 3,56(1)\n\t"                                        \
-         /* args1-8 */                                            \
-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(72)                                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc32_aix5 */
-
-/* ------------------------ ppc64-aix5 ------------------------- */
-
-#if defined(PLAT_ppc64_aix5)
-
-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
-
-/* These regs are trashed by the hidden call. */
-#define __CALLER_SAVED_REGS                                       \
-   "lr", "ctr", "xer",                                            \
-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
-   "r11", "r12", "r13"
-
-/* Expand the stack frame, copying enough info that unwinding
-   still works.  Trashes r3. */
-
-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
-         "addi 1,1,-" #_n_fr "\n\t"                               \
-         "ld   3," #_n_fr "(1)\n\t"                               \
-         "std  3,0(1)\n\t"
-
-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
-         "addi 1,1," #_n_fr "\n\t"
-
-/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
-   long) == 8. */
-
-#define CALL_FN_W_v(lval, orig)                                   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+0];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1] = (unsigned long)_orig.r2;                       \
-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_W(lval, orig, arg1)                             \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+1];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+2];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+3];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+4];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+5];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+6];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7)                            \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+7];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8)                       \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+8];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
-                                 arg7,arg8,arg9)                  \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+9];                        \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10)           \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+10];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(128)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                  arg7,arg8,arg9,arg10,arg11)     \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+11];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
-                                arg7,arg8,arg9,arg10,arg11,arg12) \
-   do {                                                           \
-      volatile OrigFn        _orig = (orig);                      \
-      volatile unsigned long _argvec[3+12];                       \
-      volatile unsigned long _res;                                \
-      /* _argvec[0] holds current r2 across the call */           \
-      _argvec[1]   = (unsigned long)_orig.r2;                     \
-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
-      _argvec[2+1] = (unsigned long)arg1;                         \
-      _argvec[2+2] = (unsigned long)arg2;                         \
-      _argvec[2+3] = (unsigned long)arg3;                         \
-      _argvec[2+4] = (unsigned long)arg4;                         \
-      _argvec[2+5] = (unsigned long)arg5;                         \
-      _argvec[2+6] = (unsigned long)arg6;                         \
-      _argvec[2+7] = (unsigned long)arg7;                         \
-      _argvec[2+8] = (unsigned long)arg8;                         \
-      _argvec[2+9] = (unsigned long)arg9;                         \
-      _argvec[2+10] = (unsigned long)arg10;                       \
-      _argvec[2+11] = (unsigned long)arg11;                       \
-      _argvec[2+12] = (unsigned long)arg12;                       \
-      __asm__ volatile(                                           \
-         "mr 11,%1\n\t"                                           \
-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
-         "std  2,-16(11)\n\t" /* save tocptr */                   \
-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
-         /* arg12 */                                              \
-         "ld  3,96(11)\n\t"                                       \
-         "std 3,136(1)\n\t"                                       \
-         /* arg11 */                                              \
-         "ld  3,88(11)\n\t"                                       \
-         "std 3,128(1)\n\t"                                       \
-         /* arg10 */                                              \
-         "ld  3,80(11)\n\t"                                       \
-         "std 3,120(1)\n\t"                                       \
-         /* arg9 */                                               \
-         "ld  3,72(11)\n\t"                                       \
-         "std 3,112(1)\n\t"                                       \
-         /* args1-8 */                                            \
-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
-         "mr 11,%1\n\t"                                           \
-         "mr %0,3\n\t"                                            \
-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
-         VG_CONTRACT_FRAME_BY(144)                                \
-         VG_CONTRACT_FRAME_BY(512)                                \
-         : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "r" (&_argvec[2])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
-      );                                                          \
-      lval = (__typeof__(lval)) _res;                             \
-   } while (0)
-
-#endif /* PLAT_ppc64_aix5 */
-
-
-/* ------------------------------------------------------------------ */
-/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
-/*                                                                    */
-/* ------------------------------------------------------------------ */
-
-/* Some request codes.  There are many more of these, but most are not
-   exposed to end-user view.  These are the public ones, all of the
-   form 0x1000 + small_number.
-
-   Core ones are in the range 0x00000000--0x0000ffff.  The non-public
-   ones start at 0x2000.
-*/
-
-/* These macros are used by tools -- they must be public, but don't
-   embed them into other programs. */
-#define VG_USERREQ_TOOL_BASE(a,b) \
-   ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
-#define VG_IS_TOOL_USERREQ(a, b, v) \
-   (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
-
-/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! 
-   This enum comprises an ABI exported by Valgrind to programs
-   which use client requests.  DO NOT CHANGE THE ORDER OF THESE
-   ENTRIES, NOR DELETE ANY -- add new ones at the end. */
-typedef
-   enum { VG_USERREQ__RUNNING_ON_VALGRIND  = 0x1001,
-          VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
-
-          /* These allow any function to be called from the simulated
-             CPU but run on the real CPU.  Nb: the first arg passed to
-             the function is always the ThreadId of the running
-             thread!  So CLIENT_CALL0 actually requires a 1 arg
-             function, etc. */
-          VG_USERREQ__CLIENT_CALL0 = 0x1101,
-          VG_USERREQ__CLIENT_CALL1 = 0x1102,
-          VG_USERREQ__CLIENT_CALL2 = 0x1103,
-          VG_USERREQ__CLIENT_CALL3 = 0x1104,
-
-          /* Can be useful in regression testing suites -- eg. can
-             send Valgrind's output to /dev/null and still count
-             errors. */
-          VG_USERREQ__COUNT_ERRORS = 0x1201,
-
-          /* These are useful and can be interpreted by any tool that
-             tracks malloc() et al, by using vg_replace_malloc.c. */
-          VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
-          VG_USERREQ__FREELIKE_BLOCK   = 0x1302,
-          /* Memory pool support. */
-          VG_USERREQ__CREATE_MEMPOOL   = 0x1303,
-          VG_USERREQ__DESTROY_MEMPOOL  = 0x1304,
-          VG_USERREQ__MEMPOOL_ALLOC    = 0x1305,
-          VG_USERREQ__MEMPOOL_FREE     = 0x1306,
-          VG_USERREQ__MEMPOOL_TRIM     = 0x1307,
-          VG_USERREQ__MOVE_MEMPOOL     = 0x1308,
-          VG_USERREQ__MEMPOOL_CHANGE   = 0x1309,
-          VG_USERREQ__MEMPOOL_EXISTS   = 0x130a,
-
-          /* Allow printfs to valgrind log. */
-          VG_USERREQ__PRINTF           = 0x1401,
-          VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
-
-          /* Stack support. */
-          VG_USERREQ__STACK_REGISTER   = 0x1501,
-          VG_USERREQ__STACK_DEREGISTER = 0x1502,
-          VG_USERREQ__STACK_CHANGE     = 0x1503
-   } Vg_ClientRequest;
-
-#if !defined(__GNUC__)
-#  define __extension__ /* */
-#endif
-
-/* Returns the number of Valgrinds this code is running under.  That
-   is, 0 if running natively, 1 if running under Valgrind, 2 if
-   running under Valgrind which is running under another Valgrind,
-   etc. */
-#define RUNNING_ON_VALGRIND  __extension__                        \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */,          \
-                               VG_USERREQ__RUNNING_ON_VALGRIND,   \
-                               0, 0, 0, 0, 0);                    \
-    _qzz_res;                                                     \
-   })
-
-
-/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
-   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
-   since it provides a way to make sure valgrind will retranslate the
-   invalidated area.  Returns no value. */
-#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DISCARD_TRANSLATIONS,  \
-                               _qzz_addr, _qzz_len, 0, 0, 0);     \
-   }
-
-
-/* These requests are for getting Valgrind itself to print something.
-   Possibly with a backtrace.  This is a really ugly hack. */
-
-#if defined(NVALGRIND)
-
-#  define VALGRIND_PRINTF(...)
-#  define VALGRIND_PRINTF_BACKTRACE(...)
-
-#else /* NVALGRIND */
-
-/* Modern GCC will optimize the static routine out if unused,
-   and unused attribute will shut down warnings about it.  */
-static int VALGRIND_PRINTF(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF,
-                              (unsigned long)format, (unsigned long)vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-   __attribute__((format(__printf__, 1, 2), __unused__));
-static int
-VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
-{
-   unsigned long _qzz_res;
-   va_list vargs;
-   va_start(vargs, format);
-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
-                              (unsigned long)format, (unsigned long)vargs, 
-                              0, 0, 0);
-   va_end(vargs);
-   return (int)_qzz_res;
-}
-
-#endif /* NVALGRIND */
-
-
-/* These requests allow control to move from the simulated CPU to the
-   real CPU, calling an arbitary function.
-   
-   Note that the current ThreadId is inserted as the first argument.
-   So this call:
-
-     VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
-
-   requires f to have this signature:
-
-     Word f(Word tid, Word arg1, Word arg2)
-
-   where "Word" is a word-sized type.
-
-   Note that these client requests are not entirely reliable.  For example,
-   if you call a function with them that subsequently calls printf(),
-   there's a high chance Valgrind will crash.  Generally, your prospects of
-   these working are made higher if the called function does not refer to
-   any global variables, and does not refer to any libc or other functions
-   (printf et al).  Any kind of entanglement with libc or dynamic linking is
-   likely to have a bad outcome, for tricky reasons which we've grappled
-   with a lot in the past.
-*/
-#define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                          \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL0,          \
-                               _qyy_fn,                           \
-                               0, 0, 0, 0);                       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)               \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL1,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, 0, 0, 0);               \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)    \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL2,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2, 0, 0);       \
-    _qyy_res;                                                     \
-   })
-
-#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
-   __extension__                                                  \
-   ({unsigned long _qyy_res;                                      \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__CLIENT_CALL3,          \
-                               _qyy_fn,                           \
-                               _qyy_arg1, _qyy_arg2,              \
-                               _qyy_arg3, 0);                     \
-    _qyy_res;                                                     \
-   })
-
-
-/* Counts the number of errors that have been recorded by a tool.  Nb:
-   the tool must record the errors with VG_(maybe_record_error)() or
-   VG_(unique_error)() for them to be counted. */
-#define VALGRIND_COUNT_ERRORS                                     \
-   __extension__                                                  \
-   ({unsigned int _qyy_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
-                               VG_USERREQ__COUNT_ERRORS,          \
-                               0, 0, 0, 0, 0);                    \
-    _qyy_res;                                                     \
-   })
-
-/* Mark a block of memory as having been allocated by a malloc()-like
-   function.  `addr' is the start of the usable block (ie. after any
-   redzone) `rzB' is redzone size if the allocator can apply redzones;
-   use '0' if not.  Adding redzones makes it more likely Valgrind will spot
-   block overruns.  `is_zeroed' indicates if the memory is zeroed, as it is
-   for calloc().  Put it immediately after the point where a block is
-   allocated. 
-   
-   If you're using Memcheck: If you're allocating memory via superblocks,
-   and then handing out small chunks of each superblock, if you don't have
-   redzones on your small blocks, it's worth marking the superblock with
-   VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are
-   detected.  But if you can put redzones on, it's probably better to not do
-   this, so that messages for small overruns are described in terms of the
-   small block rather than the superblock (but if you have a big overrun
-   that skips over a redzone, you could miss an error this way).  See
-   memcheck/tests/custom_alloc.c for an example.
-
-   WARNING: if your allocator uses malloc() or 'new' to allocate
-   superblocks, rather than mmap() or brk(), this will not work properly --
-   you'll likely get assertion failures during leak detection.  This is
-   because Valgrind doesn't like seeing overlapping heap blocks.  Sorry.
-
-   Nb: block must be freed via a free()-like function specified
-   with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)    \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MALLOCLIKE_BLOCK,      \
-                               addr, sizeB, rzB, is_zeroed, 0);   \
-   }
-
-/* Mark a block of memory as having been freed by a free()-like function.
-   `rzB' is redzone size;  it must match that given to
-   VALGRIND_MALLOCLIKE_BLOCK.  Memory not freed will be detected by the leak
-   checker.  Put it immediately after the point where the block is freed. */
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                        \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__FREELIKE_BLOCK,        \
-                               addr, rzB, 0, 0, 0);               \
-   }
-
-/* Create a memory pool. */
-#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed)             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__CREATE_MEMPOOL,        \
-                               pool, rzB, is_zeroed, 0, 0);       \
-   }
-
-/* Destroy a memory pool. */
-#define VALGRIND_DESTROY_MEMPOOL(pool)                            \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__DESTROY_MEMPOOL,       \
-                               pool, 0, 0, 0, 0);                 \
-   }
-
-/* Associate a piece of memory with a memory pool. */
-#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size)                  \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_ALLOC,         \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Disassociate a piece of memory from a memory pool. */
-#define VALGRIND_MEMPOOL_FREE(pool, addr)                         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_FREE,          \
-                               pool, addr, 0, 0, 0);              \
-   }
-
-/* Disassociate any pieces outside a particular range. */
-#define VALGRIND_MEMPOOL_TRIM(pool, addr, size)                   \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_TRIM,          \
-                               pool, addr, size, 0, 0);           \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MOVE_MEMPOOL(poolA, poolB)                       \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MOVE_MEMPOOL,          \
-                               poolA, poolB, 0, 0, 0);            \
-   }
-
-/* Resize and/or move a piece associated with a memory pool. */
-#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size)         \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_CHANGE,        \
-                               pool, addrA, addrB, size, 0);      \
-   }
-
-/* Return 1 if a mempool exists, else 0. */
-#define VALGRIND_MEMPOOL_EXISTS(pool)                             \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__MEMPOOL_EXISTS,        \
-                               pool, 0, 0, 0, 0);                 \
-    _qzz_res;                                                     \
-   })
-
-/* Mark a piece of memory as being a stack. Returns a stack id. */
-#define VALGRIND_STACK_REGISTER(start, end)                       \
-   ({unsigned int _qzz_res;                                       \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_REGISTER,        \
-                               start, end, 0, 0, 0);              \
-    _qzz_res;                                                     \
-   })
-
-/* Unmark the piece of memory associated with a stack id as being a
-   stack. */
-#define VALGRIND_STACK_DEREGISTER(id)                             \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_DEREGISTER,      \
-                               id, 0, 0, 0, 0);                   \
-   }
-
-/* Change the start and end address of the stack id. */
-#define VALGRIND_STACK_CHANGE(id, start, end)                     \
-   {unsigned int _qzz_res;                                        \
-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
-                               VG_USERREQ__STACK_CHANGE,          \
-                               id, start, end, 0, 0);             \
-   }
-
-
-#undef PLAT_x86_linux
-#undef PLAT_amd64_linux
-#undef PLAT_ppc32_linux
-#undef PLAT_ppc64_linux
-#undef PLAT_ppc32_aix5
-#undef PLAT_ppc64_aix5
-
-#endif   /* __VALGRIND_H */
diff --git a/contrib/libtcmalloc/src/thread_cache.cc b/contrib/libtcmalloc/src/thread_cache.cc
deleted file mode 100644
index 81b3694d563..00000000000
--- a/contrib/libtcmalloc/src/thread_cache.cc
+++ /dev/null
@@ -1,479 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Ken Ashcraft <opensource@google.com>
-
-#include "config.h"
-#include "thread_cache.h"
-#include <errno.h>
-#include <string.h>                     // for memcpy
-#include <algorithm>                    // for max, min
-#include "base/commandlineflags.h"      // for SpinLockHolder
-#include "base/spinlock.h"              // for SpinLockHolder
-#include "getenv_safe.h"                // for TCMallocGetenvSafe
-#include "central_freelist.h"           // for CentralFreeListPadded
-#include "maybe_threads.h"
-
-using std::min;
-using std::max;
-
-// Note: this is initialized manually in InitModule to ensure that
-// it's configured at right time
-//
-// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
-//              EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
-//                         kDefaultOverallThreadCacheSize),
-//              "Bound on the total amount of bytes allocated to "
-//              "thread caches. This bound is not strict, so it is possible "
-//              "for the cache to go over this bound in certain circumstances. "
-//              "Maximum value of this flag is capped to 1 GB.");
-
-
-namespace tcmalloc {
-
-static bool phinited = false;
-
-volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
-size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
-ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
-PageHeapAllocator<ThreadCache> threadcache_allocator;
-ThreadCache* ThreadCache::thread_heaps_ = NULL;
-int ThreadCache::thread_heap_count_ = 0;
-ThreadCache* ThreadCache::next_memory_steal_ = NULL;
-#ifdef HAVE_TLS
-__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
-    ATTR_INITIAL_EXEC
-    = {0, 0};
-#endif
-bool ThreadCache::tsd_inited_ = false;
-pthread_key_t ThreadCache::heap_key_;
-
-void ThreadCache::Init(pthread_t tid) {
-  size_ = 0;
-
-  max_size_ = 0;
-  IncreaseCacheLimitLocked();
-  if (max_size_ == 0) {
-    // There isn't enough memory to go around.  Just give the minimum to
-    // this thread.
-    max_size_ = kMinThreadCacheSize;
-
-    // Take unclaimed_cache_space_ negative.
-    unclaimed_cache_space_ -= kMinThreadCacheSize;
-    ASSERT(unclaimed_cache_space_ < 0);
-  }
-
-  next_ = NULL;
-  prev_ = NULL;
-  tid_  = tid;
-  in_setspecific_ = false;
-  for (size_t cl = 0; cl < kNumClasses; ++cl) {
-    list_[cl].Init();
-  }
-
-  uint32_t sampler_seed;
-  memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
-  sampler_.Init(sampler_seed);
-}
-
-void ThreadCache::Cleanup() {
-  // Put unused memory back into central cache
-  for (int cl = 0; cl < kNumClasses; ++cl) {
-    if (list_[cl].length() > 0) {
-      ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
-    }
-  }
-}
-
-// Remove some objects of class "cl" from central cache and add to thread heap.
-// On success, return the first object for immediate use; otherwise return NULL.
-void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
-  FreeList* list = &list_[cl];
-  ASSERT(list->empty());
-  const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-
-  const int num_to_move = min<int>(list->max_length(), batch_size);
-  void *start, *end;
-  int fetch_count = Static::central_cache()[cl].RemoveRange(
-      &start, &end, num_to_move);
-
-  ASSERT((start == NULL) == (fetch_count == 0));
-  if (--fetch_count >= 0) {
-    size_ += byte_size * fetch_count;
-    list->PushRange(fetch_count, SLL_Next(start), end);
-  }
-
-  // Increase max length slowly up to batch_size.  After that,
-  // increase by batch_size in one shot so that the length is a
-  // multiple of batch_size.
-  if (list->max_length() < batch_size) {
-    list->set_max_length(list->max_length() + 1);
-  } else {
-    // Don't let the list get too long.  In 32 bit builds, the length
-    // is represented by a 16 bit int, so we need to watch out for
-    // integer overflow.
-    int new_length = min<int>(list->max_length() + batch_size,
-                              kMaxDynamicFreeListLength);
-    // The list's max_length must always be a multiple of batch_size,
-    // and kMaxDynamicFreeListLength is not necessarily a multiple
-    // of batch_size.
-    new_length -= new_length % batch_size;
-    ASSERT(new_length % batch_size == 0);
-    list->set_max_length(new_length);
-  }
-  return start;
-}
-
-void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
-  const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-  ReleaseToCentralCache(list, cl, batch_size);
-
-  // If the list is too long, we need to transfer some number of
-  // objects to the central cache.  Ideally, we would transfer
-  // num_objects_to_move, so the code below tries to make max_length
-  // converge on num_objects_to_move.
-
-  if (list->max_length() < batch_size) {
-    // Slow start the max_length so we don't overreserve.
-    list->set_max_length(list->max_length() + 1);
-  } else if (list->max_length() > batch_size) {
-    // If we consistently go over max_length, shrink max_length.  If we don't
-    // shrink it, some amount of memory will always stay in this freelist.
-    list->set_length_overages(list->length_overages() + 1);
-    if (list->length_overages() > kMaxOverages) {
-      ASSERT(list->max_length() > batch_size);
-      list->set_max_length(list->max_length() - batch_size);
-      list->set_length_overages(0);
-    }
-  }
-}
-
-// Remove some objects of class "cl" from thread heap and add to central cache
-void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
-  ASSERT(src == &list_[cl]);
-  if (N > src->length()) N = src->length();
-  size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
-
-  // We return prepackaged chains of the correct size to the central cache.
-  // TODO: Use the same format internally in the thread caches?
-  int batch_size = Static::sizemap()->num_objects_to_move(cl);
-  while (N > batch_size) {
-    void *tail, *head;
-    src->PopRange(batch_size, &head, &tail);
-    Static::central_cache()[cl].InsertRange(head, tail, batch_size);
-    N -= batch_size;
-  }
-  void *tail, *head;
-  src->PopRange(N, &head, &tail);
-  Static::central_cache()[cl].InsertRange(head, tail, N);
-  size_ -= delta_bytes;
-}
-
-// Release idle memory to the central cache
-void ThreadCache::Scavenge() {
-  // If the low-water mark for the free list is L, it means we would
-  // not have had to allocate anything from the central cache even if
-  // we had reduced the free list size by L.  We aim to get closer to
-  // that situation by dropping L/2 nodes from the free list.  This
-  // may not release much memory, but if so we will call scavenge again
-  // pretty soon and the low-water marks will be high on that call.
-  for (int cl = 0; cl < kNumClasses; cl++) {
-    FreeList* list = &list_[cl];
-    const int lowmark = list->lowwatermark();
-    if (lowmark > 0) {
-      const int drop = (lowmark > 1) ? lowmark/2 : 1;
-      ReleaseToCentralCache(list, cl, drop);
-
-      // Shrink the max length if it isn't used.  Only shrink down to
-      // batch_size -- if the thread was active enough to get the max_length
-      // above batch_size, it will likely be that active again.  If
-      // max_length shinks below batch_size, the thread will have to
-      // go through the slow-start behavior again.  The slow-start is useful
-      // mainly for threads that stay relatively idle for their entire
-      // lifetime.
-      const int batch_size = Static::sizemap()->num_objects_to_move(cl);
-      if (list->max_length() > batch_size) {
-        list->set_max_length(
-            max<int>(list->max_length() - batch_size, batch_size));
-      }
-    }
-    list->clear_lowwatermark();
-  }
-
-  IncreaseCacheLimit();
-}
-
-void ThreadCache::IncreaseCacheLimit() {
-  SpinLockHolder h(Static::pageheap_lock());
-  IncreaseCacheLimitLocked();
-}
-
-void ThreadCache::IncreaseCacheLimitLocked() {
-  if (unclaimed_cache_space_ > 0) {
-    // Possibly make unclaimed_cache_space_ negative.
-    unclaimed_cache_space_ -= kStealAmount;
-    max_size_ += kStealAmount;
-    return;
-  }
-  // Don't hold pageheap_lock too long.  Try to steal from 10 other
-  // threads before giving up.  The i < 10 condition also prevents an
-  // infinite loop in case none of the existing thread heaps are
-  // suitable places to steal from.
-  for (int i = 0; i < 10;
-       ++i, next_memory_steal_ = next_memory_steal_->next_) {
-    // Reached the end of the linked list.  Start at the beginning.
-    if (next_memory_steal_ == NULL) {
-      ASSERT(thread_heaps_ != NULL);
-      next_memory_steal_ = thread_heaps_;
-    }
-    if (next_memory_steal_ == this ||
-        next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
-      continue;
-    }
-    next_memory_steal_->max_size_ -= kStealAmount;
-    max_size_ += kStealAmount;
-
-    next_memory_steal_ = next_memory_steal_->next_;
-    return;
-  }
-}
-
-int ThreadCache::GetSamplePeriod() {
-  return sampler_.GetSamplePeriod();
-}
-
-void ThreadCache::InitModule() {
-  SpinLockHolder h(Static::pageheap_lock());
-  if (!phinited) {
-    const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
-    if (tcb) {
-      set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
-    }
-    Static::InitStaticVars();
-    threadcache_allocator.Init();
-    phinited = 1;
-  }
-}
-
-void ThreadCache::InitTSD() {
-  ASSERT(!tsd_inited_);
-  perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
-  tsd_inited_ = true;
-
-#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
-  // We may have used a fake pthread_t for the main thread.  Fix it.
-  pthread_t zero;
-  memset(&zero, 0, sizeof(zero));
-  SpinLockHolder h(Static::pageheap_lock());
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    if (h->tid_ == zero) {
-      h->tid_ = pthread_self();
-    }
-  }
-#endif
-}
-
-ThreadCache* ThreadCache::CreateCacheIfNecessary() {
-  // Initialize per-thread data if necessary
-  ThreadCache* heap = NULL;
-  {
-    SpinLockHolder h(Static::pageheap_lock());
-    // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
-    // calling pthread routines (even pthread_self) too early could
-    // cause a segfault.  Since we can call pthreads quite early, we
-    // have to protect against that in such situations by making a
-    // 'fake' pthread.  This is not ideal since it doesn't work well
-    // when linking tcmalloc statically with apps that create threads
-    // before main, so we only do it if we have to.
-#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
-    pthread_t me;
-    if (!tsd_inited_) {
-      memset(&me, 0, sizeof(me));
-    } else {
-      me = pthread_self();
-    }
-#else
-    const pthread_t me = pthread_self();
-#endif
-
-    // This may be a recursive malloc call from pthread_setspecific()
-    // In that case, the heap for this thread has already been created
-    // and added to the linked list.  So we search for that first.
-    for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-      if (h->tid_ == me) {
-        heap = h;
-        break;
-      }
-    }
-
-    if (heap == NULL) heap = NewHeap(me);
-  }
-
-  // We call pthread_setspecific() outside the lock because it may
-  // call malloc() recursively.  We check for the recursive call using
-  // the "in_setspecific_" flag so that we can avoid calling
-  // pthread_setspecific() if we are already inside pthread_setspecific().
-  if (!heap->in_setspecific_ && tsd_inited_) {
-    heap->in_setspecific_ = true;
-    perftools_pthread_setspecific(heap_key_, heap);
-#ifdef HAVE_TLS
-    // Also keep a copy in __thread for faster retrieval
-    threadlocal_data_.heap = heap;
-    SetMinSizeForSlowPath(kMaxSize + 1);
-#endif
-    heap->in_setspecific_ = false;
-  }
-  return heap;
-}
-
-ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
-  // Create the heap and add it to the linked list
-  ThreadCache *heap = threadcache_allocator.New();
-  heap->Init(tid);
-  heap->next_ = thread_heaps_;
-  heap->prev_ = NULL;
-  if (thread_heaps_ != NULL) {
-    thread_heaps_->prev_ = heap;
-  } else {
-    // This is the only thread heap at the momment.
-    ASSERT(next_memory_steal_ == NULL);
-    next_memory_steal_ = heap;
-  }
-  thread_heaps_ = heap;
-  thread_heap_count_++;
-  return heap;
-}
-
-void ThreadCache::BecomeIdle() {
-  if (!tsd_inited_) return;              // No caches yet
-  ThreadCache* heap = GetThreadHeap();
-  if (heap == NULL) return;             // No thread cache to remove
-  if (heap->in_setspecific_) return;    // Do not disturb the active caller
-
-  heap->in_setspecific_ = true;
-  perftools_pthread_setspecific(heap_key_, NULL);
-#ifdef HAVE_TLS
-  // Also update the copy in __thread
-  threadlocal_data_.heap = NULL;
-  SetMinSizeForSlowPath(0);
-#endif
-  heap->in_setspecific_ = false;
-  if (GetThreadHeap() == heap) {
-    // Somehow heap got reinstated by a recursive call to malloc
-    // from pthread_setspecific.  We give up in this case.
-    return;
-  }
-
-  // We can now get rid of the heap
-  DeleteCache(heap);
-}
-
-void ThreadCache::BecomeTemporarilyIdle() {
-  ThreadCache* heap = GetCacheIfPresent();
-  if (heap)
-    heap->Cleanup();
-}
-
-void ThreadCache::DestroyThreadCache(void* ptr) {
-  // Note that "ptr" cannot be NULL since pthread promises not
-  // to invoke the destructor on NULL values, but for safety,
-  // we check anyway.
-  if (ptr == NULL) return;
-#ifdef HAVE_TLS
-  // Prevent fast path of GetThreadHeap() from returning heap.
-  threadlocal_data_.heap = NULL;
-  SetMinSizeForSlowPath(0);
-#endif
-  DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
-}
-
-void ThreadCache::DeleteCache(ThreadCache* heap) {
-  // Remove all memory from heap
-  heap->Cleanup();
-
-  // Remove from linked list
-  SpinLockHolder h(Static::pageheap_lock());
-  if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
-  if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
-  if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
-  thread_heap_count_--;
-
-  if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
-  if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
-  unclaimed_cache_space_ += heap->max_size_;
-
-  threadcache_allocator.Delete(heap);
-}
-
-void ThreadCache::RecomputePerThreadCacheSize() {
-  // Divide available space across threads
-  int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
-  size_t space = overall_thread_cache_size_ / n;
-
-  // Limit to allowed range
-  if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
-  if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
-
-  double ratio = space / max<double>(1, per_thread_cache_size_);
-  size_t claimed = 0;
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    // Increasing the total cache size should not circumvent the
-    // slow-start growth of max_size_.
-    if (ratio < 1.0) {
-        h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
-    }
-    claimed += h->max_size_;
-  }
-  unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
-  per_thread_cache_size_ = space;
-}
-
-void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    *total_bytes += h->Size();
-    if (class_count) {
-      for (int cl = 0; cl < kNumClasses; ++cl) {
-        class_count[cl] += h->freelist_length(cl);
-      }
-    }
-  }
-}
-
-void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
-  // Clip the value to a reasonable range
-  if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
-  if (new_size > (1<<30)) new_size = (1<<30);     // Limit to 1GB
-  overall_thread_cache_size_ = new_size;
-
-  RecomputePerThreadCacheSize();
-}
-
-}  // namespace tcmalloc
diff --git a/contrib/libtcmalloc/src/thread_cache.h b/contrib/libtcmalloc/src/thread_cache.h
deleted file mode 100644
index ff7ab1ae77d..00000000000
--- a/contrib/libtcmalloc/src/thread_cache.h
+++ /dev/null
@@ -1,474 +0,0 @@
-// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
-// Copyright (c) 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-
-#ifndef TCMALLOC_THREAD_CACHE_H_
-#define TCMALLOC_THREAD_CACHE_H_
-
-#include "config.h"
-#ifdef HAVE_PTHREAD
-#include <pthread.h>                    // for pthread_t, pthread_key_t
-#endif
-#include <stddef.h>                     // for size_t, NULL
-#ifdef HAVE_STDINT_H
-#include <stdint.h>                     // for uint32_t, uint64_t
-#endif
-#include <sys/types.h>                  // for ssize_t
-#include "base/commandlineflags.h"
-#include "common.h"
-#include "linked_list.h"
-#include "maybe_threads.h"
-#include "page_heap_allocator.h"
-#include "sampler.h"
-#include "static_vars.h"
-
-#include "common.h"            // for SizeMap, kMaxSize, etc
-#include "internal_logging.h"  // for ASSERT, etc
-#include "linked_list.h"       // for SLL_Pop, SLL_PopRange, etc
-#include "page_heap_allocator.h"  // for PageHeapAllocator
-#include "sampler.h"           // for Sampler
-#include "static_vars.h"       // for Static
-
-DECLARE_int64(tcmalloc_sample_parameter);
-
-namespace tcmalloc {
-
-//-------------------------------------------------------------------
-// Data kept per thread
-//-------------------------------------------------------------------
-
-class ThreadCache {
- public:
-#ifdef HAVE_TLS
-  enum { have_tls = true };
-#else
-  enum { have_tls = false };
-#endif
-
-  // All ThreadCache objects are kept in a linked list (for stats collection)
-  ThreadCache* next_;
-  ThreadCache* prev_;
-
-  void Init(pthread_t tid);
-  void Cleanup();
-
-  // Accessors (mostly just for printing stats)
-  int freelist_length(size_t cl) const { return list_[cl].length(); }
-
-  // Total byte size in cache
-  size_t Size() const { return size_; }
-
-  // Allocate an object of the given size and class. The size given
-  // must be the same as the size of the class in the size map.
-  void* Allocate(size_t size, size_t cl);
-  void Deallocate(void* ptr, size_t size_class);
-
-  void Scavenge();
-
-  int GetSamplePeriod();
-
-  // Record allocation of "k" bytes.  Return true iff allocation
-  // should be sampled
-  bool SampleAllocation(size_t k);
-
-  static void         InitModule();
-  static void         InitTSD();
-  static ThreadCache* GetThreadHeap();
-  static ThreadCache* GetCache();
-  static ThreadCache* GetCacheIfPresent();
-  static ThreadCache* GetCacheWhichMustBePresent();
-  static ThreadCache* CreateCacheIfNecessary();
-  static void         BecomeIdle();
-  static void         BecomeTemporarilyIdle();
-  static size_t       MinSizeForSlowPath();
-  static void         SetMinSizeForSlowPath(size_t size);
-  static void         SetUseEmergencyMalloc();
-  static void         ResetUseEmergencyMalloc();
-  static bool         IsUseEmergencyMalloc();
-
-  static bool IsFastPathAllowed() { return MinSizeForSlowPath() != 0; }
-
-  // Return the number of thread heaps in use.
-  static inline int HeapsInUse();
-
-  // Adds to *total_bytes the total number of bytes used by all thread heaps.
-  // Also, if class_count is not NULL, it must be an array of size kNumClasses,
-  // and this function will increment each element of class_count by the number
-  // of items in all thread-local freelists of the corresponding size class.
-  // REQUIRES: Static::pageheap_lock is held.
-  static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
-
-  // Sets the total thread cache size to new_size, recomputing the
-  // individual thread cache sizes as necessary.
-  // REQUIRES: Static::pageheap lock is held.
-  static void set_overall_thread_cache_size(size_t new_size);
-  static size_t overall_thread_cache_size() {
-    return overall_thread_cache_size_;
-  }
-
- private:
-  class FreeList {
-   private:
-    void*    list_;       // Linked list of nodes
-
-#ifdef _LP64
-    // On 64-bit hardware, manipulating 16-bit values may be slightly slow.
-    uint32_t length_;      // Current length.
-    uint32_t lowater_;     // Low water mark for list length.
-    uint32_t max_length_;  // Dynamic max list length based on usage.
-    // Tracks the number of times a deallocation has caused
-    // length_ > max_length_.  After the kMaxOverages'th time, max_length_
-    // shrinks and length_overages_ is reset to zero.
-    uint32_t length_overages_;
-#else
-    // If we aren't using 64-bit pointers then pack these into less space.
-    uint16_t length_;
-    uint16_t lowater_;
-    uint16_t max_length_;
-    uint16_t length_overages_;
-#endif
-
-   public:
-    void Init() {
-      list_ = NULL;
-      length_ = 0;
-      lowater_ = 0;
-      max_length_ = 1;
-      length_overages_ = 0;
-    }
-
-    // Return current length of list
-    size_t length() const {
-      return length_;
-    }
-
-    // Return the maximum length of the list.
-    size_t max_length() const {
-      return max_length_;
-    }
-
-    // Set the maximum length of the list.  If 'new_max' > length(), the
-    // client is responsible for removing objects from the list.
-    void set_max_length(size_t new_max) {
-      max_length_ = new_max;
-    }
-
-    // Return the number of times that length() has gone over max_length().
-    size_t length_overages() const {
-      return length_overages_;
-    }
-
-    void set_length_overages(size_t new_count) {
-      length_overages_ = new_count;
-    }
-
-    // Is list empty?
-    bool empty() const {
-      return list_ == NULL;
-    }
-
-    // Low-water mark management
-    int lowwatermark() const { return lowater_; }
-    void clear_lowwatermark() { lowater_ = length_; }
-
-    void Push(void* ptr) {
-      SLL_Push(&list_, ptr);
-      length_++;
-    }
-
-    void* Pop() {
-      ASSERT(list_ != NULL);
-      length_--;
-      if (length_ < lowater_) lowater_ = length_;
-      return SLL_Pop(&list_);
-    }
-
-    void* Next() {
-      return SLL_Next(&list_);
-    }
-
-    void PushRange(int N, void *start, void *end) {
-      SLL_PushRange(&list_, start, end);
-      length_ += N;
-    }
-
-    void PopRange(int N, void **start, void **end) {
-      SLL_PopRange(&list_, N, start, end);
-      ASSERT(length_ >= N);
-      length_ -= N;
-      if (length_ < lowater_) lowater_ = length_;
-    }
-  };
-
-  // Gets and returns an object from the central cache, and, if possible,
-  // also adds some objects of that size class to this thread cache.
-  void* FetchFromCentralCache(size_t cl, size_t byte_size);
-
-  // Releases some number of items from src.  Adjusts the list's max_length
-  // to eventually converge on num_objects_to_move(cl).
-  void ListTooLong(FreeList* src, size_t cl);
-
-  // Releases N items from this thread cache.
-  void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
-
-  // Increase max_size_ by reducing unclaimed_cache_space_ or by
-  // reducing the max_size_ of some other thread.  In both cases,
-  // the delta is kStealAmount.
-  void IncreaseCacheLimit();
-  // Same as above but requires Static::pageheap_lock() is held.
-  void IncreaseCacheLimitLocked();
-
-  // If TLS is available, we also store a copy of the per-thread object
-  // in a __thread variable since __thread variables are faster to read
-  // than pthread_getspecific().  We still need pthread_setspecific()
-  // because __thread variables provide no way to run cleanup code when
-  // a thread is destroyed.
-  // We also give a hint to the compiler to use the "initial exec" TLS
-  // model.  This is faster than the default TLS model, at the cost that
-  // you cannot dlopen this library.  (To see the difference, look at
-  // the CPU use of __tls_get_addr with and without this attribute.)
-  // Since we don't really use dlopen in google code -- and using dlopen
-  // on a malloc replacement is asking for trouble in any case -- that's
-  // a good tradeoff for us.
-#ifdef HAVE_TLS
-  struct ThreadLocalData {
-    ThreadCache* heap;
-    // min_size_for_slow_path is 0 if heap is NULL or kMaxSize + 1 otherwise.
-    // The latter is the common case and allows allocation to be faster
-    // than it would be otherwise: typically a single branch will
-    // determine that the requested allocation is no more than kMaxSize
-    // and we can then proceed, knowing that global and thread-local tcmalloc
-    // state is initialized.
-    size_t min_size_for_slow_path;
-
-    bool use_emergency_malloc;
-    size_t old_min_size_for_slow_path;
-  };
-  static __thread ThreadLocalData threadlocal_data_ ATTR_INITIAL_EXEC;
-#endif
-
-  // Thread-specific key.  Initialization here is somewhat tricky
-  // because some Linux startup code invokes malloc() before it
-  // is in a good enough state to handle pthread_keycreate().
-  // Therefore, we use TSD keys only after tsd_inited is set to true.
-  // Until then, we use a slow path to get the heap object.
-  static bool tsd_inited_;
-  static pthread_key_t heap_key_;
-
-  // Linked list of heap objects.  Protected by Static::pageheap_lock.
-  static ThreadCache* thread_heaps_;
-  static int thread_heap_count_;
-
-  // A pointer to one of the objects in thread_heaps_.  Represents
-  // the next ThreadCache from which a thread over its max_size_ should
-  // steal memory limit.  Round-robin through all of the objects in
-  // thread_heaps_.  Protected by Static::pageheap_lock.
-  static ThreadCache* next_memory_steal_;
-
-  // Overall thread cache size.  Protected by Static::pageheap_lock.
-  static size_t overall_thread_cache_size_;
-
-  // Global per-thread cache size.  Writes are protected by
-  // Static::pageheap_lock.  Reads are done without any locking, which should be
-  // fine as long as size_t can be written atomically and we don't place
-  // invariants between this variable and other pieces of state.
-  static volatile size_t per_thread_cache_size_;
-
-  // Represents overall_thread_cache_size_ minus the sum of max_size_
-  // across all ThreadCaches.  Protected by Static::pageheap_lock.
-  static ssize_t unclaimed_cache_space_;
-
-  // This class is laid out with the most frequently used fields
-  // first so that hot elements are placed on the same cache line.
-
-  size_t        size_;                  // Combined size of data
-  size_t        max_size_;              // size_ > max_size_ --> Scavenge()
-
-  // We sample allocations, biased by the size of the allocation
-  Sampler       sampler_;               // A sampler
-
-  FreeList      list_[kNumClasses];     // Array indexed by size-class
-
-  pthread_t     tid_;                   // Which thread owns it
-  bool          in_setspecific_;        // In call to pthread_setspecific?
-
-  // Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
-  static ThreadCache* NewHeap(pthread_t tid);
-
-  // Use only as pthread thread-specific destructor function.
-  static void DestroyThreadCache(void* ptr);
-
-  static void DeleteCache(ThreadCache* heap);
-  static void RecomputePerThreadCacheSize();
-
-  // Ensure that this class is cacheline-aligned. This is critical for
-  // performance, as false sharing would negate many of the benefits
-  // of a per-thread cache.
-} CACHELINE_ALIGNED;
-
-// Allocator for thread heaps
-// This is logically part of the ThreadCache class, but MSVC, at
-// least, does not like using ThreadCache as a template argument
-// before the class is fully defined.  So we put it outside the class.
-extern PageHeapAllocator<ThreadCache> threadcache_allocator;
-
-inline int ThreadCache::HeapsInUse() {
-  return threadcache_allocator.inuse();
-}
-
-inline bool ThreadCache::SampleAllocation(size_t k) {
-#ifndef NO_TCMALLOC_SAMPLES
-  return UNLIKELY(FLAGS_tcmalloc_sample_parameter > 0) && sampler_.SampleAllocation(k);
-#else
-  return false;
-#endif
-}
-
-inline void* ThreadCache::Allocate(size_t size, size_t cl) {
-  ASSERT(size <= kMaxSize);
-  ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
-
-  FreeList* list = &list_[cl];
-  if (UNLIKELY(list->empty())) {
-    return FetchFromCentralCache(cl, size);
-  }
-  size_ -= size;
-  return list->Pop();
-}
-
-inline void ThreadCache::Deallocate(void* ptr, size_t cl) {
-  FreeList* list = &list_[cl];
-  size_ += Static::sizemap()->ByteSizeForClass(cl);
-  ssize_t size_headroom = max_size_ - size_ - 1;
-
-  // This catches back-to-back frees of allocs in the same size
-  // class. A more comprehensive (and expensive) test would be to walk
-  // the entire freelist. But this might be enough to find some bugs.
-  ASSERT(ptr != list->Next());
-
-  list->Push(ptr);
-  ssize_t list_headroom =
-      static_cast<ssize_t>(list->max_length()) - list->length();
-
-  // There are two relatively uncommon things that require further work.
-  // In the common case we're done, and in that case we need a single branch
-  // because of the bitwise-or trick that follows.
-  if (UNLIKELY((list_headroom | size_headroom) < 0)) {
-    if (list_headroom < 0) {
-      ListTooLong(list, cl);
-    }
-    if (size_ >= max_size_) Scavenge();
-  }
-}
-
-inline ThreadCache* ThreadCache::GetThreadHeap() {
-#ifdef HAVE_TLS
-  return threadlocal_data_.heap;
-#else
-  return reinterpret_cast<ThreadCache *>(
-      perftools_pthread_getspecific(heap_key_));
-#endif
-}
-
-inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() {
-#ifdef HAVE_TLS
-  ASSERT(threadlocal_data_.heap);
-  return threadlocal_data_.heap;
-#else
-  ASSERT(perftools_pthread_getspecific(heap_key_));
-  return reinterpret_cast<ThreadCache *>(
-      perftools_pthread_getspecific(heap_key_));
-#endif
-}
-
-inline ThreadCache* ThreadCache::GetCache() {
-  ThreadCache* ptr = NULL;
-  if (!tsd_inited_) {
-    InitModule();
-  } else {
-    ptr = GetThreadHeap();
-  }
-  if (ptr == NULL) ptr = CreateCacheIfNecessary();
-  return ptr;
-}
-
-// In deletion paths, we do not try to create a thread-cache.  This is
-// because we may be in the thread destruction code and may have
-// already cleaned up the cache for this thread.
-inline ThreadCache* ThreadCache::GetCacheIfPresent() {
-#ifndef HAVE_TLS
-  if (!tsd_inited_) return NULL;
-#endif
-  return GetThreadHeap();
-}
-
-inline size_t ThreadCache::MinSizeForSlowPath() {
-#ifdef HAVE_TLS
-  return threadlocal_data_.min_size_for_slow_path;
-#else
-  return 0;
-#endif
-}
-
-inline void ThreadCache::SetMinSizeForSlowPath(size_t size) {
-#ifdef HAVE_TLS
-  threadlocal_data_.min_size_for_slow_path = size;
-#endif
-}
-
-inline void ThreadCache::SetUseEmergencyMalloc() {
-#ifdef HAVE_TLS
-  threadlocal_data_.old_min_size_for_slow_path = threadlocal_data_.min_size_for_slow_path;
-  threadlocal_data_.min_size_for_slow_path = 0;
-  threadlocal_data_.use_emergency_malloc = true;
-#endif
-}
-
-inline void ThreadCache::ResetUseEmergencyMalloc() {
-#ifdef HAVE_TLS
-  threadlocal_data_.min_size_for_slow_path = threadlocal_data_.old_min_size_for_slow_path;
-  threadlocal_data_.use_emergency_malloc = false;
-#endif
-}
-
-inline bool ThreadCache::IsUseEmergencyMalloc() {
-#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC)
-  return UNLIKELY(threadlocal_data_.use_emergency_malloc);
-#else
-  return false;
-#endif
-}
-
-
-}  // namespace tcmalloc
-
-#endif  // TCMALLOC_THREAD_CACHE_H_
diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt
index 7a6648d8dc6..73bb0fc3beb 100644
--- a/contrib/libunwind-cmake/CMakeLists.txt
+++ b/contrib/libunwind-cmake/CMakeLists.txt
@@ -1,3 +1,6 @@
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
 set(LIBUNWIND_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libunwind)
 
 set(LIBUNWIND_CXX_SOURCES
@@ -30,7 +33,22 @@ add_library(unwind ${LIBUNWIND_SOURCES})
 
 target_include_directories(unwind SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBUNWIND_SOURCE_DIR}/include>)
 target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY)
-target_compile_options(unwind PRIVATE -fno-exceptions -funwind-tables -fno-sanitize=all -nostdinc++ -fno-rtti)
+target_compile_options(unwind PRIVATE -fno-exceptions -funwind-tables -fno-sanitize=all $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++ -fno-rtti>)
+
+check_c_compiler_flag(-Wunused-but-set-variable HAVE_WARNING_UNUSED_BUT_SET_VARIABLE)
+if (HAVE_WARNING_UNUSED_BUT_SET_VARIABLE)
+    target_compile_options(unwind PRIVATE -Wno-unused-but-set-variable)
+endif ()
+
+check_cxx_compiler_flag(-Wmissing-attributes HAVE_WARNING_MISSING_ATTRIBUTES)
+if (HAVE_WARNING_MISSING_ATTRIBUTES)
+    target_compile_options(unwind PRIVATE -Wno-missing-attributes)
+endif ()
+
+check_cxx_compiler_flag(-Wmaybe-uninitialized HAVE_WARNING_MAYBE_UNINITIALIZED)
+if (HAVE_WARNING_MAYBE_UNINITIALIZED)
+    target_compile_options(unwind PRIVATE -Wno-maybe-uninitialized)
+endif ()
 
 install(
     TARGETS unwind
diff --git a/contrib/llvm b/contrib/llvm
index 163def21781..778c297395b 160000
--- a/contrib/llvm
+++ b/contrib/llvm
@@ -1 +1 @@
-Subproject commit 163def217817c90fb982a6daf384744d8472b92b
+Subproject commit 778c297395b4a2dfd60e13969a0f9488bf2c16cf
diff --git a/contrib/orc-cmake/CMakeLists.txt b/contrib/orc-cmake/CMakeLists.txt
deleted file mode 100644
index 066ba00aede..00000000000
--- a/contrib/orc-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-# modifyed copy of contrib/orc/c++/src/CMakeLists.txt
-set(LIBRARY_INCLUDE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include)
-set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/src)
-
-set(PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIR})
-set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
-
-INCLUDE(CheckCXXSourceCompiles)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<fcntl.h>
-    #include<unistd.h>
-    int main(int,char*[]){
-      int f = open(\"/x/y\", O_RDONLY);
-      char buf[100];
-      return pread(f, buf, 100, 1000) == 0;
-    }"
-  HAS_PREAD
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<time.h>
-    int main(int,char*[]){
-      struct tm time2020;
-      return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020);
-    }"
-  HAS_STRPTIME
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<string>
-    int main(int,char* argv[]){
-      return static_cast<int>(std::stoll(argv[0]));
-    }"
-  HAS_STOLL
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<stdint.h>
-    #include<stdio.h>
-    int main(int,char*[]){
-      int64_t x = 1; printf(\"%lld\",x);
-    }"
-  INT64_IS_LL
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #ifdef __clang__
-      #pragma clang diagnostic push
-      #pragma clang diagnostic ignored \"-Wdeprecated\"
-      #pragma clang diagnostic pop
-   #elif defined(__GNUC__)
-      #pragma GCC diagnostic push
-      #pragma GCC diagnostic ignored \"-Wdeprecated\"
-      #pragma GCC diagnostic pop
-   #elif defined(_MSC_VER)
-      #pragma warning( push )
-      #pragma warning( disable : 4996 )
-      #pragma warning( pop )
-   #else
-     unknownCompiler!
-   #endif
-   int main(int, char *[]) {}"
-  HAS_DIAGNOSTIC_PUSH
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<cmath>
-    int main(int, char *[]) {
-      return std::isnan(1.0f);
-    }"
-  HAS_STD_ISNAN
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<mutex>
-    int main(int, char *[]) {
-       std::mutex test_mutex;
-       std::lock_guard<std::mutex> lock_mutex(test_mutex);
-    }"
-  HAS_STD_MUTEX
-)
-
-CHECK_CXX_SOURCE_COMPILES("
-    #include<string>
-    std::string func() {
-      std::string var = \"test\";
-      return std::move(var);
-    }
-    int main(int, char *[]) {}"
-  NEEDS_REDUNDANT_MOVE
-)
-
-INCLUDE(CheckCXXSourceRuns)
-
-CHECK_CXX_SOURCE_RUNS("
-    #include<time.h>
-    int main(int, char *[]) {
-      time_t t = -14210715; // 1969-07-20 12:34:45
-      struct tm *ptm = gmtime(&t);
-      return !(ptm && ptm->tm_year == 69);
-    }"
-  HAS_PRE_1970
-)
-
-CHECK_CXX_SOURCE_RUNS("
-    #include<stdlib.h>
-    #include<time.h>
-    int main(int, char *[]) {
-      setenv(\"TZ\", \"America/Los_Angeles\", 1);
-      tzset();
-      struct tm time2037;
-      struct tm time2038;
-      strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037);
-      strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038);
-      return mktime(&time2038) - mktime(&time2037) != 31536000;
-    }"
-  HAS_POST_2038
-)
-
-set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR})
-set(CMAKE_REQUIRED_LIBRARIES zlib)
-CHECK_CXX_SOURCE_COMPILES("
-    #define Z_PREFIX
-    #include<zlib.h>
-    z_stream strm;
-    int main(int, char *[]) {
-        deflateReset(&strm);
-    }"
-  NEEDS_Z_PREFIX
-)
-
-configure_file (
-  "${LIBRARY_DIR}/Adaptor.hh.in"
-  "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
-  )
-
-
-add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.cc
-   COMMAND ${PROTOBUF_EXECUTABLE}
-        -I${ClickHouse_SOURCE_DIR}/contrib/orc/proto
-        --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
-        "${ClickHouse_SOURCE_DIR}/contrib/orc/proto/orc_proto.proto"
-)
-
-set(SOURCE_FILES
-  "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
-  ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h
-  ${LIBRARY_DIR}/io/InputStream.cc
-  ${LIBRARY_DIR}/io/OutputStream.cc
-  ${LIBRARY_DIR}/wrap/orc-proto-wrapper.cc
-  ${LIBRARY_DIR}/Adaptor.cc
-  ${LIBRARY_DIR}/ByteRLE.cc
-  ${LIBRARY_DIR}/ColumnPrinter.cc
-  ${LIBRARY_DIR}/ColumnReader.cc
-  ${LIBRARY_DIR}/ColumnWriter.cc
-  ${LIBRARY_DIR}/Common.cc
-  ${LIBRARY_DIR}/Compression.cc
-  ${LIBRARY_DIR}/Exceptions.cc
-  ${LIBRARY_DIR}/Int128.cc
-  ${LIBRARY_DIR}/LzoDecompressor.cc
-  ${LIBRARY_DIR}/MemoryPool.cc
-  ${LIBRARY_DIR}/OrcFile.cc
-  ${LIBRARY_DIR}/Reader.cc
-  ${LIBRARY_DIR}/RLEv1.cc
-  ${LIBRARY_DIR}/RLEv2.cc
-  ${LIBRARY_DIR}/RLE.cc
-  ${LIBRARY_DIR}/Statistics.cc
-  ${LIBRARY_DIR}/StripeStream.cc
-  ${LIBRARY_DIR}/Timezone.cc
-  ${LIBRARY_DIR}/TypeImpl.cc
-  ${LIBRARY_DIR}/Vector.cc
-  ${LIBRARY_DIR}/Writer.cc
-  )
-
-if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
-  set(SOURCE_FILES ${SOURCE_FILES} ${LIBRARY_DIR}/OrcHdfsFile.cc)
-endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
-
-#list(TRANSFORM SOURCE_FILES PREPEND ${LIBRARY_DIR}/)
-
-configure_file (
-  "${LIBRARY_INCLUDE}/orc/orc-config.hh.in"
-  "${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"
-  )
-
-add_library (orc ${SOURCE_FILES})
-
-target_include_directories (orc
-  PRIVATE
-  ${LIBRARY_INCLUDE}
-  ${LIBRARY_DIR}
-  #PUBLIC
-  ${CMAKE_CURRENT_BINARY_DIR}
-  PRIVATE
-  ${PROTOBUF_INCLUDE_DIR}
-  ${ZLIB_INCLUDE_DIR}
-  ${SNAPPY_INCLUDE_DIR}
-  ${LZ4_INCLUDE_DIR}
-  ${LIBHDFSPP_INCLUDE_DIR}
-  )
-
-target_link_libraries (orc PRIVATE
-  ${Protobuf_LIBRARY}
-  ${ZLIB_LIBRARIES}
-  ${SNAPPY_LIBRARY}
-  ${LZ4_LIBRARY}
-  ${LIBHDFSPP_LIBRARIES}
-  )
-
-#install(TARGETS orc DESTINATION lib)
-
-if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
-  add_definitions(-DBUILD_LIBHDFSPP)
-endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
diff --git a/contrib/simdjson b/contrib/simdjson
index e9be643db5c..60916318f76 160000
--- a/contrib/simdjson
+++ b/contrib/simdjson
@@ -1 +1 @@
-Subproject commit e9be643db5cf1c29a69bc80ee72d220124a9c50e
+Subproject commit 60916318f76432b5d04814c2af50d04ec15664ad
diff --git a/contrib/simdjson-cmake/CMakeLists.txt b/contrib/simdjson-cmake/CMakeLists.txt
index 747b85e6a94..faf0755cce5 100644
--- a/contrib/simdjson-cmake/CMakeLists.txt
+++ b/contrib/simdjson-cmake/CMakeLists.txt
@@ -12,4 +12,4 @@ set(SIMDJSON_SRC
 )
 
 add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
-target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}")
+target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 995d9298252..45d12f8ed93 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -77,9 +77,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
 elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     # Add compiler options only to c++ compiler
     function(add_cxx_compile_options option)
-        add_compile_options(
-                "$<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,CXX>:${option}>"
-        )
+        add_compile_options("$<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,CXX>:${option}>")
     endfunction()
     # Warn about boolean expression compared with an integer value different from true/false
     add_cxx_compile_options(-Wbool-compare)
@@ -106,15 +104,15 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     # Warn when the indentation of the code does not reflect the block structure
     add_cxx_compile_options(-Wmisleading-indentation)
     # Warn if a global function is defined without a previous declaration
-    add_cxx_compile_options(-Wmissing-declarations)
+    # add_cxx_compile_options(-Wmissing-declarations)
     # Warn if a user-supplied include directory does not exist
     # add_cxx_compile_options(-Wmissing-include-dirs)
     # Obvious
     add_cxx_compile_options(-Wnon-virtual-dtor)
     # Obvious
     add_cxx_compile_options(-Wno-return-local-addr)
-    # Obvious
-    add_cxx_compile_options(-Wnull-dereference)
+    # This warning is disabled due to false positives if compiled with libc++: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90037
+    #add_cxx_compile_options(-Wnull-dereference)
     # Obvious
     add_cxx_compile_options(-Wodr)
     # Obvious
@@ -291,7 +289,6 @@ macro (dbms_target_link_libraries)
 endmacro ()
 
 if (USE_EMBEDDED_COMPILER)
-    llvm_libs_all(REQUIRED_LLVM_LIBRARIES)
     dbms_target_link_libraries (PRIVATE ${REQUIRED_LLVM_LIBRARIES})
     dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
 endif ()
diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp
index a138d6ab8f4..689f68f8d5e 100644
--- a/dbms/programs/performance-test/PerformanceTest.cpp
+++ b/dbms/programs/performance-test/PerformanceTest.cpp
@@ -2,6 +2,7 @@
 
 #include <Core/Types.h>
 #include <Common/CpuId.h>
+#include <Common/quoteString.h>
 #include <common/getMemoryAmount.h>
 #include <DataStreams/copyData.h>
 #include <DataStreams/NullBlockOutputStream.h>
@@ -142,7 +143,7 @@ bool PerformanceTest::checkPreconditions() const
 
             if (!exist)
             {
-                LOG_WARNING(log, "Table " << table_to_check << " doesn't exist");
+                LOG_WARNING(log, "Table " << backQuote(table_to_check) << " doesn't exist");
                 return false;
             }
         }
diff --git a/dbms/programs/server/PrometheusMetricsWriter.cpp b/dbms/programs/server/PrometheusMetricsWriter.cpp
index b45e66f7056..11782710104 100644
--- a/dbms/programs/server/PrometheusMetricsWriter.cpp
+++ b/dbms/programs/server/PrometheusMetricsWriter.cpp
@@ -1,5 +1,7 @@
 #include "PrometheusMetricsWriter.h"
 
+#include <algorithm>
+
 #include <IO/WriteHelpers.h>
 
 namespace
@@ -20,6 +22,11 @@ void writeOutLine(DB::WriteBuffer & wb, T && val, TArgs &&... args)
     writeOutLine(wb, std::forward<TArgs>(args)...);
 }
 
+void replaceInvalidChars(std::string & metric_name)
+{
+    std::replace(metric_name.begin(), metric_name.end(), '.', '_');
+}
+
 }
 
 
@@ -47,6 +54,7 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
             std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
             std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(i))};
 
+            replaceInvalidChars(metric_name);
             std::string key{profile_events_prefix + metric_name};
 
             writeOutLine(wb, "# HELP", key, metric_doc);
@@ -64,6 +72,7 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
             std::string metric_name{CurrentMetrics::getName(static_cast<CurrentMetrics::Metric>(i))};
             std::string metric_doc{CurrentMetrics::getDocumentation(static_cast<CurrentMetrics::Metric>(i))};
 
+            replaceInvalidChars(metric_name);
             std::string key{current_metrics_prefix + metric_name};
 
             writeOutLine(wb, "# HELP", key, metric_doc);
@@ -78,6 +87,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
         for (const auto & name_value : async_metrics_values)
         {
             std::string key{asynchronous_metrics_prefix + name_value.first};
+
+            replaceInvalidChars(key);
             auto value = name_value.second;
 
             // TODO: add HELP section? asynchronous_metrics contains only key and value
diff --git a/dbms/programs/server/PrometheusMetricsWriter.h b/dbms/programs/server/PrometheusMetricsWriter.h
index 1b253b2b954..ba1f0cde61b 100644
--- a/dbms/programs/server/PrometheusMetricsWriter.h
+++ b/dbms/programs/server/PrometheusMetricsWriter.h
@@ -28,9 +28,9 @@ private:
     const bool send_metrics;
     const bool send_asynchronous_metrics;
 
-    static inline constexpr auto profile_events_prefix = "ClickHouseProfileEvents";
-    static inline constexpr auto current_metrics_prefix = "ClickHouseMetrics";
-    static inline constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics";
+    static inline constexpr auto profile_events_prefix = "ClickHouseProfileEvents_";
+    static inline constexpr auto current_metrics_prefix = "ClickHouseMetrics_";
+    static inline constexpr auto asynchronous_metrics_prefix = "ClickHouseAsyncMetrics_";
 };
 
 }
diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp
index ae1d995d363..77465daad27 100644
--- a/dbms/programs/server/Server.cpp
+++ b/dbms/programs/server/Server.cpp
@@ -18,6 +18,7 @@
 #include <common/config_common.h>
 #include <common/ErrorHandlers.h>
 #include <common/getMemoryAmount.h>
+#include <common/coverage.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/DNSResolver.h>
 #include <Common/CurrentMetrics.h>
@@ -938,6 +939,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 /// (they are effectively dangling objects, but they use global thread pool
                 ///  and global thread pool destructor will wait for threads, preventing server shutdown).
 
+                /// Dump coverage here, because std::atexit callback would not be called.
+                dumpCoverageReportIfPossible();
                 LOG_INFO(log, "Will shutdown forcefully.");
                 _exit(Application::EXIT_OK);
             }
diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp
index 9ea16254573..cb215eb0af8 100644
--- a/dbms/programs/server/TCPHandler.cpp
+++ b/dbms/programs/server/TCPHandler.cpp
@@ -110,7 +110,7 @@ void TCPHandler::runImpl()
     {
         if (!connection_context.isDatabaseExist(default_database))
         {
-            Exception e("Database " + default_database + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
+            Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
             LOG_ERROR(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
                 << ", Stack trace:\n\n" << e.getStackTrace().toString());
             sendException(e, connection_context.getSettingsRef().calculate_text_stack_trace);
diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml
index 725a2428411..c1479eaa528 100644
--- a/dbms/programs/server/config.xml
+++ b/dbms/programs/server/config.xml
@@ -318,7 +318,7 @@
     <!--
     <prometheus>
         <endpoint>/metrics</endpoint>
-        <port>8001</port>
+        <port>9363</port>
 
         <metrics>true</metrics>
         <events>true</events>
diff --git a/dbms/src/Access/AllowedClientHosts.cpp b/dbms/src/Access/AllowedClientHosts.cpp
index 4016d0ce00f..735411c5657 100644
--- a/dbms/src/Access/AllowedClientHosts.cpp
+++ b/dbms/src/Access/AllowedClientHosts.cpp
@@ -9,6 +9,7 @@
 #include <ext/scope_guard.h>
 #include <boost/range/algorithm/find.hpp>
 #include <boost/range/algorithm/find_first_of.hpp>
+#include <ifaddrs.h>
 
 
 namespace DB
@@ -30,10 +31,12 @@ namespace
         if (addr.family() == IPAddress::IPv6)
             return addr;
 
+        if (addr.isLoopback())
+            return IPAddress("::1");
+
         return IPAddress("::FFFF:" + addr.toString());
     }
 
-
     IPAddress maskToIPv6(const IPAddress & mask)
     {
         if (mask.family() == IPAddress::IPv6)
@@ -48,11 +51,11 @@ namespace
         IPAddress addr_v6 = toIPv6(address);
 
         /// Resolve by hand, because Poco don't use AI_ALL flag but we need it.
-        addrinfo * ai = nullptr;
+        addrinfo * ai_begin = nullptr;
         SCOPE_EXIT(
         {
-            if (ai)
-                freeaddrinfo(ai);
+            if (ai_begin)
+                freeaddrinfo(ai_begin);
         });
 
         addrinfo hints;
@@ -60,26 +63,26 @@ namespace
         hints.ai_family = AF_UNSPEC;
         hints.ai_flags |= AI_V4MAPPED | AI_ALL;
 
-        int ret = getaddrinfo(host.c_str(), nullptr, &hints, &ai);
-        if (0 != ret)
-            throw Exception("Cannot getaddrinfo: " + std::string(gai_strerror(ret)), ErrorCodes::DNS_ERROR);
+        int err = getaddrinfo(host.c_str(), nullptr, &hints, &ai_begin);
+        if (err)
+            throw Exception("Cannot getaddrinfo(" + host + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
 
-        for (; ai != nullptr; ai = ai->ai_next)
+        for (const addrinfo * ai = ai_begin; ai; ai = ai->ai_next)
         {
             if (ai->ai_addrlen && ai->ai_addr)
             {
-                if (ai->ai_family == AF_INET6)
+                if (ai->ai_family == AF_INET)
                 {
-                    if (addr_v6 == IPAddress(
-                        &reinterpret_cast<sockaddr_in6*>(ai->ai_addr)->sin6_addr, sizeof(in6_addr),
-                        reinterpret_cast<sockaddr_in6*>(ai->ai_addr)->sin6_scope_id))
+                    const auto & sin = *reinterpret_cast<const sockaddr_in *>(ai->ai_addr);
+                    if (addr_v6 == toIPv6(IPAddress(&sin.sin_addr, sizeof(sin.sin_addr))))
                     {
                         return true;
                     }
                 }
-                else if (ai->ai_family == AF_INET)
+                else if (ai->ai_family == AF_INET6)
                 {
-                    if (addr_v6 == toIPv6(IPAddress(&reinterpret_cast<sockaddr_in *>(ai->ai_addr)->sin_addr, sizeof(in_addr))))
+                    const auto & sin = *reinterpret_cast<const sockaddr_in6*>(ai->ai_addr);
+                    if (addr_v6 == IPAddress(&sin.sin6_addr, sizeof(sin.sin6_addr), sin.sin6_scope_id))
                     {
                         return true;
                     }
@@ -99,19 +102,61 @@ namespace
     }
 
 
+    std::vector<IPAddress> getAddressesOfLocalhostImpl()
+    {
+        std::vector<IPAddress> addresses;
+
+        ifaddrs * ifa_begin = nullptr;
+        SCOPE_EXIT({
+            if (ifa_begin)
+                freeifaddrs(ifa_begin);
+        });
+
+        int err = getifaddrs(&ifa_begin);
+        if (err)
+            return {IPAddress{"127.0.0.1"}, IPAddress{"::1"}};
+
+        for (const ifaddrs * ifa = ifa_begin; ifa; ifa = ifa->ifa_next)
+        {
+            if (!ifa->ifa_addr)
+                continue;
+            if (ifa->ifa_addr->sa_family == AF_INET)
+            {
+                const auto & sin = *reinterpret_cast<const sockaddr_in *>(ifa->ifa_addr);
+                addresses.push_back(toIPv6(IPAddress(&sin.sin_addr, sizeof(sin.sin_addr))));
+            }
+            else if (ifa->ifa_addr->sa_family == AF_INET6)
+            {
+                const auto & sin = *reinterpret_cast<const sockaddr_in6 *>(ifa->ifa_addr);
+                addresses.push_back(IPAddress(&sin.sin6_addr, sizeof(sin.sin6_addr), sin.sin6_scope_id));
+            }
+        }
+        return addresses;
+    }
+
+
+    /// Checks if a specified address pointers to the localhost.
+    bool isLocalAddress(const IPAddress & address)
+    {
+        static const std::vector<IPAddress> local_addresses = getAddressesOfLocalhostImpl();
+        return boost::range::find(local_addresses, address) != local_addresses.end();
+    }
+
+
     String getHostByAddressImpl(const IPAddress & address)
     {
         Poco::Net::SocketAddress sock_addr(address, 0);
 
         /// Resolve by hand, because Poco library doesn't have such functionality.
         char host[1024];
-        int gai_errno = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
-        if (0 != gai_errno)
-            throw Exception("Cannot getnameinfo: " + std::string(gai_strerror(gai_errno)), ErrorCodes::DNS_ERROR);
+        int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD);
+        if (err)
+            throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR);
 
         /// Check that PTR record is resolved back to client address
         if (!isAddressOfHost(address, host))
             throw Exception("Host " + String(host) + " isn't resolved back to " + address.toString(), ErrorCodes::DNS_ERROR);
+
         return host;
     }
 
@@ -158,6 +203,7 @@ AllowedClientHosts::AllowedClientHosts(const AllowedClientHosts & src)
 AllowedClientHosts & AllowedClientHosts::operator =(const AllowedClientHosts & src)
 {
     addresses = src.addresses;
+    loopback = src.loopback;
     subnets = src.subnets;
     host_names = src.host_names;
     host_regexps = src.host_regexps;
@@ -175,6 +221,7 @@ AllowedClientHosts::AllowedClientHosts(AllowedClientHosts && src)
 AllowedClientHosts & AllowedClientHosts::operator =(AllowedClientHosts && src)
 {
     addresses = std::move(src.addresses);
+    loopback = src.loopback;
     subnets = std::move(src.subnets);
     host_names = std::move(src.host_names);
     host_regexps = std::move(src.host_regexps);
@@ -186,6 +233,7 @@ AllowedClientHosts & AllowedClientHosts::operator =(AllowedClientHosts && src)
 void AllowedClientHosts::clear()
 {
     addresses.clear();
+    loopback = false;
     subnets.clear();
     host_names.clear();
     host_regexps.clear();
@@ -204,6 +252,8 @@ void AllowedClientHosts::addAddress(const IPAddress & address)
     IPAddress addr_v6 = toIPv6(address);
     if (boost::range::find(addresses, addr_v6) == addresses.end())
         addresses.push_back(addr_v6);
+    if (addr_v6.isLoopback())
+        loopback = true;
 }
 
 
@@ -291,30 +341,28 @@ bool AllowedClientHosts::containsAllAddresses() const
 }
 
 
-bool AllowedClientHosts::contains(const IPAddress & address) const
-{
-    return containsImpl(address, String(), nullptr);
-}
-
-
 void AllowedClientHosts::checkContains(const IPAddress & address, const String & user_name) const
 {
-    String error;
-    if (!containsImpl(address, user_name, &error))
-        throw Exception(error, ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
+    if (!contains(address))
+    {
+        if (user_name.empty())
+            throw Exception("It's not allowed to connect from address " + address.toString(), ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
+        else
+            throw Exception("User " + user_name + " is not allowed to connect from address " + address.toString(), ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
+    }
 }
 
 
-bool AllowedClientHosts::containsImpl(const IPAddress & address, const String & user_name, String * error) const
+bool AllowedClientHosts::contains(const IPAddress & address) const
 {
-    if (error)
-        error->clear();
-
     /// Check `ip_addresses`.
     IPAddress addr_v6 = toIPv6(address);
     if (boost::range::find(addresses, addr_v6) != addresses.end())
         return true;
 
+    if (loopback && isLocalAddress(addr_v6))
+        return true;
+
     /// Check `ip_subnets`.
     for (const auto & subnet : subnets)
         if ((addr_v6 & subnet.mask) == subnet.prefix)
@@ -325,14 +373,13 @@ bool AllowedClientHosts::containsImpl(const IPAddress & address, const String &
     {
         try
         {
-            if (isAddressOfHost(address, host_name))
+            if (isAddressOfHost(addr_v6, host_name))
                 return true;
         }
-        catch (Exception & e)
+        catch (const Exception & e)
         {
             if (e.code() != ErrorCodes::DNS_ERROR)
-                e.rethrow();
-
+                throw;
             /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
             LOG_WARNING(
                 &Logger::get("AddressPatterns"),
@@ -342,38 +389,31 @@ bool AllowedClientHosts::containsImpl(const IPAddress & address, const String &
     }
 
     /// Check `host_regexps`.
-    if (!host_regexps.empty())
+    try
     {
-        compileRegexps();
-        try
+        String resolved_host = getHostByAddress(addr_v6);
+        if (!resolved_host.empty())
         {
-            String resolved_host = getHostByAddress(address);
+            compileRegexps();
             for (const auto & compiled_regexp : compiled_host_regexps)
             {
-                if (compiled_regexp && compiled_regexp->match(resolved_host))
+                Poco::RegularExpression::Match match;
+                if (compiled_regexp && compiled_regexp->match(resolved_host, match))
                     return true;
             }
         }
-        catch (Exception & e)
-        {
-            if (e.code() != ErrorCodes::DNS_ERROR)
-                e.rethrow();
-
-            /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
-            LOG_WARNING(
-                &Logger::get("AddressPatterns"),
-                "Failed to check if the allowed client hosts contain address " << address.toString() << ". " << e.displayText()
-                                                                               << ", code = " << e.code());
-        }
     }
-
-    if (error)
+    catch (const Exception & e)
     {
-        if (user_name.empty())
-            *error = "It's not allowed to connect from address " + address.toString();
-        else
-            *error = "User " + user_name + " is not allowed to connect from address " + address.toString();
+        if (e.code() != ErrorCodes::DNS_ERROR)
+            throw;
+        /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
+        LOG_WARNING(
+            &Logger::get("AddressPatterns"),
+            "Failed to check if the allowed client hosts contain address " << address.toString() << ". " << e.displayText()
+                                                                         << ", code = " << e.code());
     }
+
     return false;
 }
 
diff --git a/dbms/src/Access/AllowedClientHosts.h b/dbms/src/Access/AllowedClientHosts.h
index fea797c2aa4..17f8be878a1 100644
--- a/dbms/src/Access/AllowedClientHosts.h
+++ b/dbms/src/Access/AllowedClientHosts.h
@@ -91,10 +91,10 @@ public:
     friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); }
 
 private:
-    bool containsImpl(const IPAddress & address, const String & user_name, String * error) const;
     void compileRegexps() const;
 
     std::vector<IPAddress> addresses;
+    bool loopback = false;
     std::vector<IPSubnet> subnets;
     std::vector<String> host_names;
     std::vector<String> host_regexps;
diff --git a/dbms/src/Common/Dwarf.cpp b/dbms/src/Common/Dwarf.cpp
index 87f91e1d732..38606f8a8a8 100644
--- a/dbms/src/Common/Dwarf.cpp
+++ b/dbms/src/Common/Dwarf.cpp
@@ -454,7 +454,7 @@ Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t
         case DW_FORM_flag:
             return uint64_t(read<uint8_t>(sp));
         case DW_FORM_flag_present:
-            return 1;
+            return uint64_t(1);
         case DW_FORM_sec_offset: [[fallthrough]];
         case DW_FORM_ref_addr:
             return readOffset(sp, is64Bit);
diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 4298232ac9c..cb89a9c4e3d 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -96,7 +96,6 @@ struct Settings : public SettingsCollection<Settings>
     \
     M(SettingBool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
     M(SettingBool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
-    M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.", 0) \
     M(SettingUInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
     M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
     M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
@@ -123,8 +122,6 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     M(SettingUInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)", 0) \
     \
-    M(SettingBool, merge_tree_uniform_read_distribution, true, "Distribute read from MergeTree over threads evenly, ensuring stable average execution time of each thread within one read operation.", 0) \
-    \
     M(SettingUInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
     \
     M(SettingUInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
@@ -393,7 +390,10 @@ struct Settings : public SettingsCollection<Settings>
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13", 0) \
-    M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13", 0) \
+    M(SettingBool, compile, false, "Obsolete setting, does nothing. Will be removed after 2020-03-13", 0) \
+    M(SettingUInt64, min_count_to_compile, 0, "Obsolete setting, does nothing. Will be removed after 2020-03-13", 0) \
+    M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \
+
 
     DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)
 
diff --git a/dbms/src/Core/SortCursor.h b/dbms/src/Core/SortCursor.h
index c898ee71b8f..5b4db43024f 100644
--- a/dbms/src/Core/SortCursor.h
+++ b/dbms/src/Core/SortCursor.h
@@ -1,5 +1,9 @@
 #pragma once
 
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Core/SortDescription.h>
@@ -98,9 +102,12 @@ struct SortCursorImpl
 
     bool isFirst() const { return pos == 0; }
     bool isLast() const { return pos + 1 >= rows; }
+    bool isValid() const { return pos < rows; }
     void next() { ++pos; }
 };
 
+using SortCursorImpls = std::vector<SortCursorImpl>;
+
 
 /// For easy copying.
 struct SortCursor
@@ -203,4 +210,102 @@ struct SortCursorWithCollation
     }
 };
 
+
+/** Allows to fetch data from multiple sort cursors in sorted order (merging sorted data streams).
+  */
+template <typename Cursor>
+class SortingHeap
+{
+public:
+    SortingHeap() = default;
+
+    template <typename Cursors>
+    SortingHeap(Cursors & cursors)
+    {
+        size_t size = cursors.size();
+        queue.reserve(size);
+        for (size_t i = 0; i < size; ++i)
+            queue.emplace_back(&cursors[i]);
+        std::make_heap(queue.begin(), queue.end());
+    }
+
+    bool isValid() const { return !queue.empty(); }
+
+    Cursor & current() { return queue.front(); }
+
+    void next()
+    {
+        assert(isValid());
+
+        if (!current()->isLast())
+        {
+            current()->next();
+            updateTop();
+        }
+        else
+            removeTop();
+    }
+
+private:
+    using Container = std::vector<Cursor>;
+    Container queue;
+
+    /// This is adapted version of the function __sift_down from libc++.
+    /// Why cannot simply use std::priority_queue?
+    /// - because it doesn't support updating the top element and requires pop and push instead.
+    void updateTop()
+    {
+        size_t size = queue.size();
+        if (size < 2)
+            return;
+
+        size_t child_idx = 1;
+        auto begin = queue.begin();
+        auto child_it = begin + 1;
+
+        /// Right child exists and is greater than left child.
+        if (size > 2 && *child_it < *(child_it + 1))
+        {
+            ++child_it;
+            ++child_idx;
+        }
+
+        /// Check if we are in order.
+        if (*child_it < *begin)
+            return;
+
+        auto curr_it = begin;
+        auto top(std::move(*begin));
+        do
+        {
+            /// We are not in heap-order, swap the parent with it's largest child.
+            *curr_it = std::move(*child_it);
+            curr_it = child_it;
+
+            if ((size - 2) / 2 < child_idx)
+                break;
+
+            // recompute the child based off of the updated parent
+            child_idx = 2 * child_idx + 1;
+            child_it = begin + child_idx;
+
+            if ((child_idx + 1) < size && *child_it < *(child_it + 1))
+            {
+                /// Right child exists and is greater than left child.
+                ++child_it;
+                ++child_idx;
+            }
+
+            /// Check if we are in order.
+        } while (!(*child_it < top));
+        *curr_it = std::move(top);
+    }
+
+    void removeTop()
+    {
+        std::pop_heap(queue.begin(), queue.end());
+        queue.pop_back();
+    }
+};
+
 }
diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp
index 9f6f8173cde..1c50316fc3f 100644
--- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp
@@ -1,4 +1,3 @@
-#include <Poco/Version.h>
 #include <DataStreams/MergeSortingBlockInputStream.h>
 #include <DataStreams/MergingSortedBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
@@ -152,15 +151,9 @@ MergeSortingBlocksBlockInputStream::MergeSortingBlocksBlockInputStream(
     blocks.swap(nonempty_blocks);
 
     if (!has_collation)
-    {
-        for (size_t i = 0; i < cursors.size(); ++i)
-            queue_without_collation.push(SortCursor(&cursors[i]));
-    }
+        queue_without_collation = SortingHeap<SortCursor>(cursors);
     else
-    {
-        for (size_t i = 0; i < cursors.size(); ++i)
-            queue_with_collation.push(SortCursorWithCollation(&cursors[i]));
-    }
+        queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
 }
 
 
@@ -177,52 +170,50 @@ Block MergeSortingBlocksBlockInputStream::readImpl()
     }
 
     return !has_collation
-        ? mergeImpl<SortCursor>(queue_without_collation)
-        : mergeImpl<SortCursorWithCollation>(queue_with_collation);
+        ? mergeImpl(queue_without_collation)
+        : mergeImpl(queue_with_collation);
 }
 
 
-template <typename TSortCursor>
-Block MergeSortingBlocksBlockInputStream::mergeImpl(std::priority_queue<TSortCursor> & queue)
+template <typename TSortingHeap>
+Block MergeSortingBlocksBlockInputStream::mergeImpl(TSortingHeap & queue)
 {
-    size_t num_columns = blocks[0].columns();
+    size_t num_columns = header.columns();
 
-    MutableColumns merged_columns = blocks[0].cloneEmptyColumns();
+    MutableColumns merged_columns = header.cloneEmptyColumns();
     /// TODO: reserve (in each column)
 
     /// Take rows from queue in right order and push to 'merged'.
     size_t merged_rows = 0;
-    while (!queue.empty())
+    while (queue.isValid())
     {
-        TSortCursor current = queue.top();
-        queue.pop();
+        auto current = queue.current();
 
+        /// Append a row from queue.
         for (size_t i = 0; i < num_columns; ++i)
             merged_columns[i]->insertFrom(*current->all_columns[i], current->pos);
 
-        if (!current->isLast())
-        {
-            current->next();
-            queue.push(current);
-        }
-
         ++total_merged_rows;
+        ++merged_rows;
+
+        /// We don't need more rows because of limit has reached.
         if (limit && total_merged_rows == limit)
         {
-            auto res = blocks[0].cloneWithColumns(std::move(merged_columns));
             blocks.clear();
-            return res;
+            break;
         }
 
-        ++merged_rows;
+        queue.next();
+
+        /// It's enough for current output block but we will continue.
         if (merged_rows == max_merged_block_size)
-            return blocks[0].cloneWithColumns(std::move(merged_columns));
+            break;
     }
 
     if (merged_rows == 0)
         return {};
 
-    return blocks[0].cloneWithColumns(std::move(merged_columns));
+    return header.cloneWithColumns(std::move(merged_columns));
 }
 
 
diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h
index a8b8e8cfd3b..9492bdb074b 100644
--- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h
+++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <queue>
-
 #include <common/logger_useful.h>
 
 #include <Common/filesystemHelpers.h>
@@ -56,19 +54,18 @@ private:
     UInt64 limit;
     size_t total_merged_rows = 0;
 
-    using CursorImpls = std::vector<SortCursorImpl>;
-    CursorImpls cursors;
+    SortCursorImpls cursors;
 
     bool has_collation = false;
 
-    std::priority_queue<SortCursor> queue_without_collation;
-    std::priority_queue<SortCursorWithCollation> queue_with_collation;
+    SortingHeap<SortCursor> queue_without_collation;
+    SortingHeap<SortCursorWithCollation> queue_with_collation;
 
     /** Two different cursors are supported - with and without Collation.
      *  Templates are used (instead of virtual functions in SortCursor) for zero-overhead.
      */
-    template <typename TSortCursor>
-    Block mergeImpl(std::priority_queue<TSortCursor> & queue);
+    template <typename TSortingHeap>
+    Block mergeImpl(TSortingHeap & queue);
 };
 
 
diff --git a/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp
index 36326a6210f..2ae1c335387 100644
--- a/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp
+++ b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp
@@ -46,7 +46,7 @@ struct TypesTestCase
     const char * expected_type = nullptr;
 };
 
-static std::ostream & operator<<(std::ostream & ostr, const TypesTestCase & test_case)
+std::ostream & operator<<(std::ostream & ostr, const TypesTestCase & test_case)
 {
     ostr << "TypesTestCase{\"" << test_case.from_types << "\", ";
     if (test_case.expected_type)
diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp
index d39ed5ec193..9299a75ad37 100644
--- a/dbms/src/Databases/DatabaseDictionary.cpp
+++ b/dbms/src/Databases/DatabaseDictionary.cpp
@@ -39,7 +39,7 @@ Tables DatabaseDictionary::listTables(const Context & context, const FilterByNam
     if (filter_by_name)
     {
         /// If `filter_by_name` is set, we iterate through all dictionaries with such names. That's why we need to load all of them.
-        context.getExternalDictionariesLoader().load(filter_by_name, load_results);
+        load_results = context.getExternalDictionariesLoader().tryLoad<ExternalLoader::LoadResults>(filter_by_name);
     }
     else
     {
@@ -47,12 +47,12 @@ Tables DatabaseDictionary::listTables(const Context & context, const FilterByNam
         load_results = context.getExternalDictionariesLoader().getCurrentLoadResults();
     }
 
-    for (const auto & [object_name, info]: load_results)
+    for (const auto & load_result: load_results)
     {
         /// Load tables only from XML dictionaries, don't touch other
-        if (info.object != nullptr && info.repository_name.empty())
+        if (load_result.object && load_result.repository_name.empty())
         {
-            auto dict_ptr = std::static_pointer_cast<const IDictionaryBase>(info.object);
+            auto dict_ptr = std::static_pointer_cast<const IDictionaryBase>(load_result.object);
             auto dict_name = dict_ptr->getName();
             const DictionaryStructure & dictionary_structure = dict_ptr->getStructure();
             auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure);
diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp
index b771c35687b..c5359e8f734 100644
--- a/dbms/src/Databases/DatabaseOnDisk.cpp
+++ b/dbms/src/Databases/DatabaseOnDisk.cpp
@@ -286,7 +286,9 @@ void DatabaseOnDisk::createDictionary(
     String full_name = database.getDatabaseName() + "." + dictionary_name;
     auto & external_loader = const_cast<ExternalDictionariesLoader &>(context.getExternalDictionariesLoader());
     if (external_loader.getCurrentStatus(full_name) != ExternalLoader::Status::NOT_EXIST)
-        throw Exception("Dictionary " + backQuote(full_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS);
+        throw Exception(
+            "Dictionary " + backQuote(database.getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.",
+            ErrorCodes::DICTIONARY_ALREADY_EXISTS);
 
     if (database.isTableExist(context, dictionary_name))
         throw Exception("Table " + backQuote(database.getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS);
@@ -326,9 +328,9 @@ void DatabaseOnDisk::createDictionary(
     bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true);
     if (!lazy_load)
     {
-        /// loadStrict() is called here to force loading the dictionary, wait until the loading is finished,
+        /// load() is called here to force loading the dictionary, wait until the loading is finished,
         /// and throw an exception if the loading is failed.
-        external_loader.loadStrict(full_name);
+        external_loader.load(full_name);
     }
 
     database.attachDictionary(dictionary_name, context);
diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp
index 3322d9d56bb..160d4f38f1c 100644
--- a/dbms/src/Databases/DatabasesCommon.cpp
+++ b/dbms/src/Databases/DatabasesCommon.cpp
@@ -153,7 +153,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name)
 
         auto it = tables.find(table_name);
         if (it == tables.end())
-            throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
+            throw Exception("Table " + backQuote(name) + "." + backQuote(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
         res = it->second;
         tables.erase(it);
     }
diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp
index f5aaf14bf6b..945348d883b 100644
--- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp
+++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp
@@ -142,7 +142,7 @@ LibraryDictionarySource::LibraryDictionarySource(
 
     if (!Poco::File(path).exists())
         throw Exception(
-            "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist",
+            "LibraryDictionarySource: Can't load library " + Poco::File(path).path() + ": file doesn't exist",
             ErrorCodes::FILE_DOESNT_EXIST);
 
     description.init(sample_block);
diff --git a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
index ce96cfc3e99..676cdcf5c34 100644
--- a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
+++ b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp
@@ -64,8 +64,11 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration)
     EXPECT_EQ(config->getInt("dictionary.lifetime.max"), 10);
 
     /// range
-    EXPECT_EQ(config->getString("dictionary.structure.range_min"), "second_column");
-    EXPECT_EQ(config->getString("dictionary.structure.range_max"), "third_column");
+    EXPECT_EQ(config->getString("dictionary.structure.range_min.name"), "second_column");
+    EXPECT_EQ(config->getString("dictionary.structure.range_max.name"), "third_column");
+    EXPECT_EQ(config->getString("dictionary.structure.range_min.type"), "UInt8");
+    EXPECT_EQ(config->getString("dictionary.structure.range_max.type"), "UInt8");
+
 
     /// source
     EXPECT_EQ(config->getString("dictionary.source.clickhouse.host"), "localhost");
diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt
index 6db48932093..9eed1061349 100644
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@@ -62,7 +62,6 @@ if (ENABLE_TESTS)
 endif ()
 
 if (USE_EMBEDDED_COMPILER)
-    llvm_libs_all(REQUIRED_LLVM_LIBRARIES)
     target_link_libraries(clickhouse_functions PRIVATE ${REQUIRED_LLVM_LIBRARIES})
     target_include_directories(clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
 endif ()
diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h
index a3fe968a157..c264b2fb0ba 100644
--- a/dbms/src/Functions/FunctionsBitmap.h
+++ b/dbms/src/Functions/FunctionsBitmap.h
@@ -991,24 +991,37 @@ private:
     void executeBitmapData(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
     {
         const ColumnAggregateFunction * columns[2];
+        bool is_column_const[2];
         for (size_t i = 0; i < 2; ++i)
         {
             if (auto argument_column_const = typeid_cast<const ColumnConst *>(block.getByPosition(arguments[i]).column.get()))
+            {
                 columns[i] = typeid_cast<const ColumnAggregateFunction *>(argument_column_const->getDataColumnPtr().get());
+                is_column_const[i] = true;
+            }
             else
+            {
                 columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
+                is_column_const[i] = false;
+            }
         }
 
         auto col_to = ColumnAggregateFunction::create(columns[0]->getAggregateFunction());
 
         col_to->reserve(input_rows_count);
 
+        const PaddedPODArray<AggregateDataPtr> & container0 = columns[0]->getData();
+        const PaddedPODArray<AggregateDataPtr> & container1 = columns[1]->getData();
+
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            col_to->insertFrom(columns[0]->getData()[i]);
+            const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i];
+            const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i];
+
+            col_to->insertFrom(data_ptr_0);
             AggregateFunctionGroupBitmapData<T> & bitmap_data_1 = *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
             const AggregateFunctionGroupBitmapData<T> & bitmap_data_2
-                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(columns[1]->getData()[i]);
+                = *reinterpret_cast<const AggregateFunctionGroupBitmapData<T> *>(data_ptr_1);
             Impl<T>::apply(bitmap_data_1, bitmap_data_2);
         }
         block.getByPosition(result).column = std::move(col_to);
diff --git a/dbms/src/Functions/now64.cpp b/dbms/src/Functions/now64.cpp
index bd0eae0f077..f1a8d444019 100644
--- a/dbms/src/Functions/now64.cpp
+++ b/dbms/src/Functions/now64.cpp
@@ -16,13 +16,16 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int CANNOT_CLOCK_GETTIME;
 }
 
 static Field nowSubsecond(UInt32 scale)
 {
-    const Int32 fractional_scale = 9;
-    timespec spec;
-    clock_gettime(CLOCK_REALTIME, &spec);
+    static constexpr Int32 fractional_scale = 9;
+
+    timespec spec{};
+    if (clock_gettime(CLOCK_REALTIME, &spec))
+        throwFromErrno("Cannot clock_gettime.", ErrorCodes::CANNOT_CLOCK_GETTIME);
 
     DecimalUtils::DecimalComponents<DateTime64::NativeType> components{spec.tv_sec, spec.tv_nsec};
 
diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h
index f015dcddfbb..9503e2c28e9 100644
--- a/dbms/src/Interpreters/AnalyzedJoin.h
+++ b/dbms/src/Interpreters/AnalyzedJoin.h
@@ -119,6 +119,9 @@ public:
     const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; }
     const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; }
 
+    /// StorageJoin overrides key names (cause of different names qualification)
+    void setRightKeys(const Names & keys) { key_names_right = keys; }
+
     static bool sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y);
     friend JoinPtr makeJoin(std::shared_ptr<AnalyzedJoin> table_join, const Block & right_sample_block);
 };
diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp
index fad1a6af043..661325b22c2 100644
--- a/dbms/src/Interpreters/AsynchronousMetrics.cpp
+++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp
@@ -72,9 +72,6 @@ void AsynchronousMetrics::run()
 
     while (true)
     {
-        if (wait_cond.wait_until(lock, get_next_minute(), [this] { return quit; }))
-            break;
-
         try
         {
             update();
@@ -83,6 +80,9 @@ void AsynchronousMetrics::run()
         {
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
+
+        if (wait_cond.wait_until(lock, get_next_minute(), [this] { return quit; }))
+            break;
     }
 }
 
diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index 12ba5850750..2c75bd821fe 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -83,7 +83,8 @@ Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, cons
     default_database = config.getString(config_prefix + ".default_database", "");
     secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
     compression = config.getBool(config_prefix + ".compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable;
-    is_local = isLocal(config.getInt("tcp_port", 0));
+    const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
+    is_local = isLocal(config.getInt(port_type, 0));
 }
 
 
diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp
index afa4fca79f8..861a6b5ff03 100644
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@@ -645,7 +645,8 @@ void DDLWorker::processTask(DDLTask & task, const ZooKeeperPtr & zookeeper)
         }
         catch (...)
         {
-            task.execution_status = ExecutionStatus::fromCurrentException("An error occured before execution");
+            tryLogCurrentException(log, "An error occurred before execution of DDL task: ");
+            task.execution_status = ExecutionStatus::fromCurrentException("An error occurred before execution");
         }
 
         /// We need to distinguish ZK errors occured before and after query executing
diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
index 37239e0bd11..59dff858cf0 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@@ -230,6 +230,16 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
 }
 
 
+NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const
+{
+    auto result_columns = sourceColumns();
+    result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end());
+    result_columns.insert(result_columns.end(),
+                        analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
+    return result_columns;
+}
+
+
 void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
 {
     auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
@@ -313,12 +323,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
             }
             else
             {
-                NamesAndTypesList temp_columns = sourceColumns();
-                temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
-                temp_columns.insert(temp_columns.end(),
-                                    analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
-
-                ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
+                ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context);
                 getRootActions(left_in_operand, true, temp_actions);
 
                 Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
@@ -451,10 +456,10 @@ bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, b
     return true;
 }
 
-static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, const Context & context)
+static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, std::shared_ptr<AnalyzedJoin> analyzed_join,
+                                 const Context & context)
 {
     const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>();
-    auto & join_params = join_element.table_join->as<ASTTableJoin &>();
 
     /// TODO This syntax does not support specifying a database name.
     if (table_to_join.database_and_table_name)
@@ -465,14 +470,8 @@ static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_elem
         if (table)
         {
             auto * storage_join = dynamic_cast<StorageJoin *>(table.get());
-
             if (storage_join)
-            {
-                storage_join->assertCompatible(join_params.kind, join_params.strictness);
-                /// TODO Check the set of keys.
-
-                return storage_join->getJoin();
-            }
+                return storage_join->getJoin(analyzed_join);
         }
     }
 
@@ -497,7 +496,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer
 
     /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping).
     if (!subquery_for_join.join)
-        subquery_for_join.join = tryGetStorageJoin(join_element, context);
+        subquery_for_join.join = tryGetStorageJoin(join_element, syntax->analyzed_join, context);
 
     if (!subquery_for_join.join)
     {
@@ -724,7 +723,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
         step.required_output.push_back(child->getColumnName());
 }
 
-bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types)
+bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order)
 {
     const auto * select_query = getSelectQuery();
 
@@ -745,6 +744,16 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
         step.required_output.push_back(order_expression->getColumnName());
     }
 
+    if (optimize_read_in_order)
+    {
+        auto all_columns = sourceWithJoinedColumns();
+        for (auto & child : select_query->orderBy()->children)
+        {
+            order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
+            getRootActions(child, only_types, order_by_elements_actions.back());
+        }
+    }
+
     return true;
 }
 
diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h
index 8cd7b754632..2de2d2bfd18 100644
--- a/dbms/src/Interpreters/ExpressionAnalyzer.h
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.h
@@ -18,6 +18,7 @@ class Context;
 struct ExpressionActionsChain;
 class ExpressionActions;
 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
 
 struct ASTTableJoin;
 class IJoin;
@@ -46,6 +47,9 @@ struct ExpressionAnalyzerData
 
     /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
     Tables external_tables;
+
+    /// Actions by every element of ORDER BY
+    ManyExpressionActions order_by_elements_actions;
 };
 
 
@@ -119,6 +123,7 @@ protected:
     const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; }
     const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
     const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
+    NamesAndTypesList sourceWithJoinedColumns() const;
 
     /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
     void initGlobalSubqueriesAndExternalTables(bool do_global);
@@ -169,6 +174,8 @@ public:
 
     const PreparedSets & getPreparedSets() const { return prepared_sets; }
 
+    const ManyExpressionActions & getOrderByActions() const { return order_by_elements_actions; }
+
     /// Tables that will need to be sent to remote servers for distributed query processing.
     const Tables & getExternalTables() const { return external_tables; }
 
@@ -201,7 +208,7 @@ public:
     /// After aggregation:
     bool appendHaving(ExpressionActionsChain & chain, bool only_types);
     void appendSelect(ExpressionActionsChain & chain, bool only_types);
-    bool appendOrderBy(ExpressionActionsChain & chain, bool only_types);
+    bool appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order);
     bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
     /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
     void appendProjectResult(ExpressionActionsChain & chain) const;
diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp
index a8459ecb6c9..944e6cc78f2 100644
--- a/dbms/src/Interpreters/ExpressionJIT.cpp
+++ b/dbms/src/Interpreters/ExpressionJIT.cpp
@@ -22,7 +22,6 @@
 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
 
 #include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Config/llvm-config.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/DataLayout.h>
 #include <llvm/IR/DerivedTypes.h>
@@ -32,6 +31,7 @@
 #include <llvm/IR/Mangler.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
+#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/ExecutionEngine/JITSymbol.h>
 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
@@ -48,6 +48,10 @@
 
 #pragma GCC diagnostic pop
 
+/// 'LegacyRTDyldObjectLinkingLayer' is deprecated: ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please use ORCv2
+/// 'LegacyIRCompileLayer' is deprecated: ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please use the ORCv2 IRCompileLayer instead
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
 
 namespace ProfileEvents
 {
@@ -124,105 +128,68 @@ static llvm::TargetMachine * getNativeMachine()
     llvm::TargetOptions options;
     return target->createTargetMachine(
         triple, cpu, features.getString(), options, llvm::None,
-#if LLVM_VERSION_MAJOR >= 6
         llvm::None, llvm::CodeGenOpt::Default, /*jit=*/true
-#else
-        llvm::CodeModel::Default, llvm::CodeGenOpt::Default
-#endif
     );
 }
 
-#if LLVM_VERSION_MAJOR >= 7
-static auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr)
+
+struct SymbolResolver : public llvm::orc::SymbolResolver
 {
-#if USE_INTERNAL_LLVM_LIBRARY && LLVM_VERSION_PATCH == 0
-    // REMOVE AFTER contrib/llvm upgrade
-    auto flags = [&](llvm::orc::SymbolFlagsMap & flags_internal, const llvm::orc::SymbolNameSet & symbols)
+    llvm::LegacyJITSymbolResolver & impl;
+
+    SymbolResolver(llvm::LegacyJITSymbolResolver & impl_) : impl(impl_) {}
+
+    llvm::orc::SymbolNameSet getResponsibilitySet(const llvm::orc::SymbolNameSet & symbols) final
+    {
+        return symbols;
+    }
+
+    llvm::orc::SymbolNameSet lookup(std::shared_ptr<llvm::orc::AsynchronousSymbolQuery> query, llvm::orc::SymbolNameSet symbols) final
     {
         llvm::orc::SymbolNameSet missing;
         for (const auto & symbol : symbols)
         {
-            auto resolved = jsr.lookupFlags({*symbol});
-            if (resolved && resolved->size())
-                flags_internal.emplace(symbol, resolved->begin()->second);
-            else
-                missing.emplace(symbol);
+            bool has_resolved = false;
+            impl.lookup({*symbol}, [&](llvm::Expected<llvm::JITSymbolResolver::LookupResult> resolved)
+            {
+                if (resolved && resolved->size())
+                {
+                    query->notifySymbolMetRequiredState(symbol, resolved->begin()->second);
+                    has_resolved = true;
+                }
+            });
+
+            if (!has_resolved)
+                missing.insert(symbol);
         }
         return missing;
-    };
-#else
-    // Actually this should work for 7.0.0 but now we have OLDER 7.0.0svn in contrib
-    auto flags = [&](const llvm::orc::SymbolNameSet & symbols)
-    {
-        llvm::orc::SymbolFlagsMap flags_map;
-        for (const auto & symbol : symbols)
-        {
-            auto resolved = jsr.lookupFlags({*symbol});
-            if (resolved && resolved->size())
-                flags_map.emplace(symbol, resolved->begin()->second);
-        }
-        return flags_map;
-    };
-#endif
+    }
+};
 
-    auto symbols = [&](std::shared_ptr<llvm::orc::AsynchronousSymbolQuery> query, llvm::orc::SymbolNameSet symbols_set)
-    {
-        llvm::orc::SymbolNameSet missing;
-        for (const auto & symbol : symbols_set)
-        {
-            auto resolved = jsr.lookup({*symbol});
-            if (resolved && resolved->size())
-                query->resolve(symbol, resolved->begin()->second);
-            else
-                missing.emplace(symbol);
-        }
-        return missing;
-    };
-    return llvm::orc::createSymbolResolver(flags, symbols);
-}
-#endif
-
-#if LLVM_VERSION_MAJOR >= 7
-using ModulePtr = std::unique_ptr<llvm::Module>;
-#else
-using ModulePtr = std::shared_ptr<llvm::Module>;
-#endif
 
 struct LLVMContext
 {
-    std::shared_ptr<llvm::LLVMContext> context;
-#if LLVM_VERSION_MAJOR >= 7
+    std::shared_ptr<llvm::LLVMContext> context {std::make_shared<llvm::LLVMContext>()};
+    std::unique_ptr<llvm::Module> module {std::make_unique<llvm::Module>("jit", *context)};
+    std::unique_ptr<llvm::TargetMachine> machine {getNativeMachine()};
+    llvm::DataLayout layout {machine->createDataLayout()};
+    llvm::IRBuilder<> builder {*context};
+
     llvm::orc::ExecutionSession execution_session;
-#endif
-    ModulePtr module;
-    std::unique_ptr<llvm::TargetMachine> machine;
+
     std::shared_ptr<llvm::SectionMemoryManager> memory_manager;
-    llvm::orc::RTDyldObjectLinkingLayer object_layer;
-    llvm::orc::IRCompileLayer<decltype(object_layer), llvm::orc::SimpleCompiler> compile_layer;
-    llvm::DataLayout layout;
-    llvm::IRBuilder<> builder;
+    llvm::orc::LegacyRTDyldObjectLinkingLayer object_layer;
+    llvm::orc::LegacyIRCompileLayer<decltype(object_layer), llvm::orc::SimpleCompiler> compile_layer;
+
     std::unordered_map<std::string, void *> symbols;
 
     LLVMContext()
-        : context(std::make_shared<llvm::LLVMContext>())
-#if LLVM_VERSION_MAJOR >= 7
-        , module(std::make_unique<llvm::Module>("jit", *context))
-#else
-        , module(std::make_shared<llvm::Module>("jit", *context))
-#endif
-        , machine(getNativeMachine())
-        , memory_manager(std::make_shared<llvm::SectionMemoryManager>())
-#if LLVM_VERSION_MAJOR >= 7
+        : memory_manager(std::make_shared<llvm::SectionMemoryManager>())
         , object_layer(execution_session, [this](llvm::orc::VModuleKey)
         {
-            return llvm::orc::RTDyldObjectLinkingLayer::Resources{memory_manager, wrapJITSymbolResolver(*memory_manager)};
+            return llvm::orc::LegacyRTDyldObjectLinkingLayer::Resources{memory_manager, std::make_shared<SymbolResolver>(*memory_manager)};
         })
-#else
-        , object_layer([this]() { return memory_manager; })
-#endif
         , compile_layer(object_layer, llvm::orc::SimpleCompiler(*machine))
-        , layout(machine->createDataLayout())
-        , builder(*context)
     {
         module->setDataLayout(layout);
         module->setTargetTriple(machine->getTargetTriple().getTriple());
@@ -258,14 +225,9 @@ struct LLVMContext
         for (const auto & function : *module)
             functions.emplace_back(function.getName());
 
-#if LLVM_VERSION_MAJOR >= 7
         llvm::orc::VModuleKey module_key = execution_session.allocateVModule();
         if (compile_layer.addModule(module_key, std::move(module)))
             throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE);
-#else
-        if (!compile_layer.addModule(module, memory_manager))
-            throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE);
-#endif
 
         for (const auto & name : functions)
         {
@@ -284,6 +246,13 @@ struct LLVMContext
     }
 };
 
+
+template <typename... Ts, typename F>
+static bool castToEither(IColumn * column, F && f)
+{
+    return ((typeid_cast<Ts *>(column) ? f(*typeid_cast<Ts *>(column)) : false) || ...);
+}
+
 class LLVMExecutableFunction : public IExecutableFunctionImpl
 {
     std::string name;
@@ -307,9 +276,16 @@ public:
 
     void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t block_size) override
     {
-        auto col_res = block.getByPosition(result).type->createColumn()->cloneResized(block_size);
+        auto col_res = block.getByPosition(result).type->createColumn();
+
         if (block_size)
         {
+            if (!castToEither<
+                ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
+                ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
+                ColumnFloat32, ColumnFloat64>(col_res.get(), [block_size](auto & col) { col.getData().resize(block_size); return true; }))
+                throw Exception("Unexpected column in LLVMExecutableFunction: " + col_res->getName(), ErrorCodes::LOGICAL_ERROR);
+
             std::vector<ColumnData> columns(arguments.size() + 1);
             for (size_t i = 0; i < arguments.size(); ++i)
             {
@@ -321,6 +297,7 @@ public:
             columns[arguments.size()] = getColumnData(col_res.get());
             reinterpret_cast<void (*) (size_t, ColumnData *)>(function)(block_size, columns.data());
         }
+
         block.getByPosition(result).column = std::move(col_res);
     }
 };
@@ -647,7 +624,12 @@ static std::vector<std::unordered_set<std::optional<size_t>>> getActionsDependen
     return dependents;
 }
 
-void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr<CompiledExpressionCache> compilation_cache, size_t min_count_to_compile_expression)
+void compileFunctions(
+    ExpressionActions::Actions & actions,
+    const Names & output_columns,
+    const Block & sample_block,
+    std::shared_ptr<CompiledExpressionCache> compilation_cache,
+    size_t min_count_to_compile_expression)
 {
     static std::unordered_map<UInt128, UInt32, UInt128Hash> counter;
     static std::mutex mutex;
diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h
index 4fca5b54bff..15293ac09c0 100644
--- a/dbms/src/Interpreters/ExternalDictionariesLoader.h
+++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h
@@ -21,12 +21,12 @@ public:
 
     DictPtr getDictionary(const std::string & name) const
     {
-        return std::static_pointer_cast<const IDictionaryBase>(getLoadable(name));
+        return std::static_pointer_cast<const IDictionaryBase>(load(name));
     }
 
     DictPtr tryGetDictionary(const std::string & name) const
     {
-        return std::static_pointer_cast<const IDictionaryBase>(tryGetLoadable(name));
+        return std::static_pointer_cast<const IDictionaryBase>(tryLoad(name));
     }
 
     void addConfigRepository(
diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp
index 8cc88ab5ef6..215263d8b3c 100644
--- a/dbms/src/Interpreters/ExternalLoader.cpp
+++ b/dbms/src/Interpreters/ExternalLoader.cpp
@@ -12,6 +12,7 @@
 #include <ext/scope_guard.h>
 #include <boost/range/adaptor/map.hpp>
 #include <boost/range/algorithm/copy.hpp>
+#include <unordered_set>
 
 
 namespace DB
@@ -22,21 +23,68 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+
 namespace
 {
-
-/// Lock mutex only in async mode
-/// In other case does nothing
-struct LoadingGuardForAsyncLoad
-{
-    std::unique_lock<std::mutex> lock;
-    LoadingGuardForAsyncLoad(bool async, std::mutex & mutex)
+    template <typename ReturnType>
+    ReturnType convertTo(ExternalLoader::LoadResult result)
     {
-        if (async)
-            lock = std::unique_lock(mutex);
+        if constexpr (std::is_same_v<ReturnType, ExternalLoader::LoadResult>)
+            return result;
+        else
+        {
+            static_assert(std::is_same_v<ReturnType, ExternalLoader::LoadablePtr>);
+            return std::move(result.object);
+        }
     }
-};
 
+    template <typename ReturnType>
+    ReturnType convertTo(ExternalLoader::LoadResults results)
+    {
+        if constexpr (std::is_same_v<ReturnType, ExternalLoader::LoadResults>)
+            return results;
+        else
+        {
+            static_assert(std::is_same_v<ReturnType, ExternalLoader::Loadables>);
+            ExternalLoader::Loadables objects;
+            objects.reserve(results.size());
+            for (const auto & result : results)
+            {
+                if (auto object = std::move(result.object))
+                    objects.push_back(std::move(object));
+            }
+            return objects;
+        }
+    }
+
+    template <typename ReturnType>
+    ReturnType notExists(const String & name)
+    {
+        if constexpr (std::is_same_v<ReturnType, ExternalLoader::LoadResult>)
+        {
+            ExternalLoader::LoadResult res;
+            res.name = name;
+            return res;
+        }
+        else
+        {
+            static_assert(std::is_same_v<ReturnType, ExternalLoader::LoadablePtr>);
+            return nullptr;
+        }
+    }
+
+
+    /// Lock mutex only in async mode
+    /// In other case does nothing
+    struct LoadingGuardForAsyncLoad
+    {
+        std::unique_lock<std::mutex> lock;
+        LoadingGuardForAsyncLoad(bool async, std::mutex & mutex)
+        {
+            if (async)
+                lock = std::unique_lock(mutex);
+        }
+    };
 }
 
 struct ExternalLoader::ObjectConfig
@@ -340,11 +388,11 @@ public:
         infos.clear(); /// We clear this map to tell the threads that we don't want any load results anymore.
 
         /// Wait for all the threads to finish.
-        while (!loading_ids.empty())
+        while (!loading_threads.empty())
         {
-            auto it = loading_ids.begin();
+            auto it = loading_threads.begin();
             auto thread = std::move(it->second);
-            loading_ids.erase(it);
+            loading_threads.erase(it);
             lock.unlock();
             event.notify_all();
             thread.join();
@@ -377,14 +425,13 @@ public:
                 if (!config_is_same)
                 {
                     /// Configuration has been changed.
-                    info.config_changed = true;
+                    info.object_config = new_config;
 
-                    if (info.wasLoading())
+                    if (info.triedToLoad())
                     {
                         /// The object has been tried to load before, so it is currently in use or was in use
                         /// and we should try to reload it with the new config.
-                        cancelLoading(info);
-                        startLoading(name, info);
+                        startLoading(info, true);
                     }
                 }
             }
@@ -395,9 +442,9 @@ public:
         {
             if (infos.find(name) == infos.end())
             {
-                Info & info = infos.emplace(name, Info{config}).first->second;
+                Info & info = infos.emplace(name, Info{name, config}).first->second;
                 if (always_load_everything)
-                    startLoading(name, info);
+                    startLoading(info);
             }
         }
 
@@ -424,8 +471,8 @@ public:
         {
             /// Start loading all the objects which were not loaded yet.
             for (auto & [name, info] : infos)
-                if (!info.wasLoading())
-                    startLoading(name, info);
+                if (!info.triedToLoad())
+                    startLoading(info);
         }
     }
 
@@ -448,35 +495,25 @@ public:
     }
 
     /// Returns the load result of the object.
-    LoadResult getCurrentLoadResult(const String & name) const
+    template <typename ReturnType>
+    ReturnType getCurrentLoadResult(const String & name) const
     {
         std::lock_guard lock{mutex};
         const Info * info = getInfo(name);
         if (!info)
-            return {Status::NOT_EXIST};
-        return info->loadResult();
+            return notExists<ReturnType>(name);
+        return info->getLoadResult<ReturnType>();
     }
 
     /// Returns all the load results as a map.
     /// The function doesn't load anything, it just returns the current load results as is.
-    LoadResults getCurrentLoadResults(const FilterByNameFunction & filter_by_name) const
+    template <typename ReturnType>
+    ReturnType getCurrentLoadResults(const FilterByNameFunction & filter) const
     {
         std::lock_guard lock{mutex};
-        return collectLoadResults(filter_by_name);
+        return collectLoadResults<ReturnType>(filter);
     }
 
-    LoadResults getCurrentLoadResults() const { return getCurrentLoadResults(allNames); }
-
-    /// Returns all the loaded objects as a map.
-    /// The function doesn't load anything, it just returns the current load results as is.
-    Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const
-    {
-        std::lock_guard lock{mutex};
-        return collectLoadedObjects(filter_by_name);
-    }
-
-    Loadables getCurrentlyLoadedObjects() const { return getCurrentlyLoadedObjects(allNames); }
-
     size_t getNumberOfCurrentlyLoadedObjects() const
     {
         std::lock_guard lock{mutex};
@@ -499,92 +536,53 @@ public:
         return false;
     }
 
+    Strings getAllTriedToLoadNames() const
+    {
+        Strings names;
+        for (auto & [name, info] : infos)
+            if (info.triedToLoad())
+                names.push_back(name);
+        return names;
+    }
+
     /// Tries to load a specified object during the timeout.
-    /// Returns nullptr if the loading is unsuccessful or if there is no such object.
-    void load(const String & name, LoadablePtr & loaded_object, Duration timeout = NO_TIMEOUT)
+    template <typename ReturnType>
+    ReturnType tryLoad(const String & name, Duration timeout)
     {
         std::unique_lock lock{mutex};
-        Info * info = loadImpl(name, timeout, lock);
-        loaded_object = (info ? info->object : nullptr);
-    }
-
-    /// Tries to finish loading of a specified object during the timeout.
-    /// Returns nullptr if the loading is unsuccessful or if there is no such object.
-    void loadStrict(const String & name, LoadablePtr & loaded_object)
-    {
-        std::unique_lock lock{mutex};
-        Info * info = loadImpl(name, NO_TIMEOUT, lock);
+        Info * info = loadImpl(name, timeout, false, lock);
         if (!info)
-            throw Exception("No such " + type_name + " '" + name + "'.", ErrorCodes::BAD_ARGUMENTS);
-        checkLoaded(name, *info);
-        loaded_object = info->object;
+            return notExists<ReturnType>(name);
+        return info->getLoadResult<ReturnType>();
     }
 
-    /// Tries to start loading of the objects for which the specified functor returns true.
-    void load(const FilterByNameFunction & filter_by_name)
-    {
-        std::lock_guard lock{mutex};
-        for (auto & [name, info] : infos)
-            if (!info.wasLoading() && filter_by_name(name))
-                startLoading(name, info);
-    }
-
-    /// Tries to finish loading of the objects for which the specified function returns true.
-    void load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout = NO_TIMEOUT)
+    template <typename ReturnType>
+    ReturnType tryLoad(const FilterByNameFunction & filter, Duration timeout)
     {
         std::unique_lock lock{mutex};
-        loadImpl(filter_by_name, timeout, lock);
-        loaded_objects = collectLoadedObjects(filter_by_name);
+        loadImpl(filter, timeout, false, lock);
+        return collectLoadResults<ReturnType>(filter);
     }
 
-    /// Tries to finish loading of the objects for which the specified function returns true.
-    void load(const FilterByNameFunction & filter_by_name, LoadResults & loaded_results, Duration timeout = NO_TIMEOUT)
+    /// Tries to load or reload a specified object.
+    template <typename ReturnType>
+    ReturnType tryLoadOrReload(const String & name, Duration timeout)
     {
         std::unique_lock lock{mutex};
-        loadImpl(filter_by_name, timeout, lock);
-        loaded_results = collectLoadResults(filter_by_name);
-    }
-
-    /// Tries to finish loading of all the objects during the timeout.
-    void load(Loadables & loaded_objects, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_objects, timeout); }
-    void load(LoadResults & loaded_results, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_results, timeout); }
-
-    /// Starts reloading a specified object.
-    void reload(const String & name, bool load_never_loading = false)
-    {
-        std::lock_guard lock{mutex};
-        Info * info = getInfo(name);
+        Info * info = loadImpl(name, timeout, true, lock);
         if (!info)
-        {
-            return;
-        }
-
-        if (info->wasLoading() || load_never_loading)
-        {
-            cancelLoading(*info);
-            info->forced_to_reload = true;
-            startLoading(name, *info);
-        }
+            return notExists<ReturnType>(name);
+        return info->getLoadResult<ReturnType>();
     }
 
-    /// Starts reloading of the objects which `filter_by_name` returns true for.
-    void reload(const FilterByNameFunction & filter_by_name, bool load_never_loading = false)
+    template <typename ReturnType>
+    ReturnType tryLoadOrReload(const FilterByNameFunction & filter, Duration timeout)
     {
-        std::lock_guard lock{mutex};
-        for (auto & [name, info] : infos)
-        {
-            if ((info.wasLoading() || load_never_loading) && filter_by_name(name))
-            {
-                cancelLoading(info);
-                info.forced_to_reload = true;
-                startLoading(name, info);
-            }
-        }
+        std::unique_lock lock{mutex};
+        loadImpl(filter, timeout, true, lock);
+        return collectLoadResults<ReturnType>(filter);
     }
 
-    /// Starts reloading of all the objects.
-    void reload(bool load_never_loading = false) { reload(allNames, load_never_loading); }
-
     /// Starts reloading all the object which update time is earlier than now.
     /// The function doesn't touch the objects which were never tried to load.
     void reloadOutdated()
@@ -597,7 +595,7 @@ public:
             for (const auto & name_and_info : infos)
             {
                 const auto & info = name_and_info.second;
-                if ((now >= info.next_update_time) && !info.loading() && info.loaded())
+                if ((now >= info.next_update_time) && !info.is_loading() && info.loaded())
                     should_update_map.emplace(info.object, info.failedToReload());
             }
         }
@@ -627,7 +625,7 @@ public:
             TimePoint now = std::chrono::system_clock::now();
             for (auto & [name, info] : infos)
             {
-                if ((now >= info.next_update_time) && !info.loading())
+                if ((now >= info.next_update_time) && !info.is_loading())
                 {
                     if (info.loaded())
                     {
@@ -643,12 +641,12 @@ public:
                         }
 
                         /// Object was modified or it was failed to reload last time, so it should be reloaded.
-                        startLoading(name, info);
+                        startLoading(info);
                     }
                     else if (info.failed())
                     {
                         /// Object was never loaded successfully and should be reloaded.
-                        startLoading(name, info);
+                        startLoading(info);
                     }
                 }
             }
@@ -658,53 +656,64 @@ public:
 private:
     struct Info
     {
-        Info(const ObjectConfig & object_config_) : object_config(object_config_) {}
+        Info(const String & name_, const ObjectConfig & object_config_) : name(name_), object_config(object_config_) {}
 
         bool loaded() const { return object != nullptr; }
         bool failed() const { return !object && exception; }
-        bool loading() const { return loading_id != 0; }
-        bool wasLoading() const { return loaded() || failed() || loading(); }
-        bool ready() const { return (loaded() || failed()) && !forced_to_reload; }
+        bool loadedOrFailed() const { return loaded() || failed(); }
+        bool triedToLoad() const { return loaded() || failed() || is_loading(); }
         bool failedToReload() const { return loaded() && exception != nullptr; }
+        bool is_loading() const { return loading_id > state_id; }
 
         Status status() const
         {
             if (object)
-                return loading() ? Status::LOADED_AND_RELOADING : Status::LOADED;
+                return is_loading() ? Status::LOADED_AND_RELOADING : Status::LOADED;
             else if (exception)
-                return loading() ? Status::FAILED_AND_RELOADING : Status::FAILED;
+                return is_loading() ? Status::FAILED_AND_RELOADING : Status::FAILED;
             else
-                return loading() ? Status::LOADING : Status::NOT_LOADED;
+                return is_loading() ? Status::LOADING : Status::NOT_LOADED;
         }
 
         Duration loadingDuration() const
         {
-            if (loading())
+            if (is_loading())
                 return std::chrono::duration_cast<Duration>(std::chrono::system_clock::now() - loading_start_time);
             return std::chrono::duration_cast<Duration>(loading_end_time - loading_start_time);
         }
 
-        LoadResult loadResult() const
+        template <typename ReturnType>
+        ReturnType getLoadResult() const
         {
-            LoadResult result{status()};
-            result.object = object;
-            result.exception = exception;
-            result.loading_start_time = loading_start_time;
-            result.loading_duration = loadingDuration();
-            result.origin = object_config.path;
-            result.repository_name = object_config.repository_name;
-            return result;
+            if constexpr (std::is_same_v<ReturnType, LoadResult>)
+            {
+                LoadResult result;
+                result.name = name;
+                result.status = status();
+                result.object = object;
+                result.exception = exception;
+                result.loading_start_time = loading_start_time;
+                result.loading_duration = loadingDuration();
+                result.origin = object_config.path;
+                result.repository_name = object_config.repository_name;
+                return result;
+            }
+            else
+            {
+                static_assert(std::is_same_v<ReturnType, ExternalLoader::LoadablePtr>);
+                return object;
+            }
         }
 
-        ObjectConfig object_config;
+        String name;
         LoadablePtr object;
+        ObjectConfig object_config;
         TimePoint loading_start_time;
         TimePoint loading_end_time;
-        size_t loading_id = 0; /// Non-zero if it's loading right now.
+        size_t state_id = 0; /// Index of the current state of this `info`, this index is incremented every loading.
+        size_t loading_id = 0; /// The value which will be stored in `state_id` after finishing the current loading.
         size_t error_count = 0; /// Numbers of errors since last successful loading.
         std::exception_ptr exception; /// Last error occurred.
-        bool config_changed = false; /// Whether the config has been change since last successful loading.
-        bool forced_to_reload = false; /// Whether the current reloading is forced, i.e. caused by user's direction. For periodic reloading and reloading due to a config's change `forced_to_reload == false`.
         TimePoint next_update_time = TimePoint::max(); /// Time of the next update, `TimePoint::max()` means "never".
     };
 
@@ -724,42 +733,49 @@ private:
         return &it->second;
     }
 
-    Loadables collectLoadedObjects(const FilterByNameFunction & filter_by_name) const
+    template <typename ReturnType>
+    ReturnType collectLoadResults(const FilterByNameFunction & filter) const
     {
-        Loadables objects;
-        objects.reserve(infos.size());
-        for (const auto & [name, info] : infos)
-            if (info.loaded() && filter_by_name(name))
-                objects.emplace_back(info.object);
-        return objects;
-    }
-
-    LoadResults collectLoadResults(const FilterByNameFunction & filter_by_name) const
-    {
-        LoadResults load_results;
-        load_results.reserve(infos.size());
+        ReturnType results;
+        results.reserve(infos.size());
         for (const auto & [name, info] : infos)
         {
-            if (filter_by_name(name))
-                load_results.emplace_back(name, info.loadResult());
+            if (filter(name))
+            {
+                auto result = info.template getLoadResult<typename ReturnType::value_type>();
+                if constexpr (std::is_same_v<typename ReturnType::value_type, LoadablePtr>)
+                {
+                    if (!result)
+                        continue;
+                }
+                results.emplace_back(std::move(result));
+            }
         }
-        return load_results;
+        return results;
     }
 
-    Info * loadImpl(const String & name, Duration timeout, std::unique_lock<std::mutex> & lock)
+    Info * loadImpl(const String & name, Duration timeout, bool forced_to_reload, std::unique_lock<std::mutex> & lock)
     {
-        Info * info;
-        auto pred = [&]()
+        std::optional<size_t> min_id;
+        Info * info = nullptr;
+        auto pred = [&]
         {
             info = getInfo(name);
-            if (!info || info->ready())
-                return true;
-            if (!info->loading())
-                startLoading(name, *info);
-            return info->ready();
+            if (!info)
+                return true; /// stop
+
+            if (!min_id)
+                min_id = getMinIDToFinishLoading(forced_to_reload);
+
+            if (info->state_id >= min_id)
+                return true; /// stop
+
+            if (info->loading_id < min_id)
+                startLoading(*info, forced_to_reload, *min_id);
+            return false; /// wait for the next event
         };
 
-        if (timeout == NO_TIMEOUT)
+        if (timeout == WAIT)
             event.wait(lock, pred);
         else
             event.wait_for(lock, timeout, pred);
@@ -767,36 +783,69 @@ private:
         return info;
     }
 
-    void loadImpl(const FilterByNameFunction & filter_by_name, Duration timeout, std::unique_lock<std::mutex> & lock)
+    void loadImpl(const FilterByNameFunction & filter, Duration timeout, bool forced_to_reload, std::unique_lock<std::mutex> & lock)
     {
-        auto pred = [&]()
+        std::optional<size_t> min_id;
+        auto pred = [&]
         {
+            if (!min_id)
+                min_id = getMinIDToFinishLoading(forced_to_reload);
+
             bool all_ready = true;
             for (auto & [name, info] : infos)
             {
-                if (info.ready() || !filter_by_name(name))
+                if (!filter(name))
                     continue;
-                if (!info.loading())
-                    startLoading(name, info);
-                if (!info.ready())
-                    all_ready = false;
+
+                if (info.state_id >= min_id)
+                    continue;
+
+                all_ready = false;
+                if (info.loading_id < min_id)
+                    startLoading(info, forced_to_reload, *min_id);
             }
             return all_ready;
         };
 
-        if (timeout == NO_TIMEOUT)
+        if (timeout == WAIT)
             event.wait(lock, pred);
         else
             event.wait_for(lock, timeout, pred);
     }
 
-    void startLoading(const String & name, Info & info)
+    /// When state_id >= getMinIDToFinishLoading() the loading is considered as finished.
+    size_t getMinIDToFinishLoading(bool forced_to_reload) const
     {
-        if (info.loading())
-            return;
+        if (forced_to_reload)
+        {
+            /// We need to force reloading, that's why we return next_id_counter here
+            /// (because info.state_id < next_id_counter for any info).
+            return next_id_counter;
+        }
+
+        /// The loading of an object can cause the loading of another object.
+        /// We use the same "min_id" in this case to allows reloading multiple objects at once
+        /// taking into account their dependencies.
+        auto it = min_id_to_finish_loading_dependencies.find(std::this_thread::get_id());
+        if (it != min_id_to_finish_loading_dependencies.end())
+            return it->second;
+
+        /// We just need the first loading to be finished, that's why we return 1 here
+        /// (because info.state_id >= 1 since the first loading is finished, successfully or not).
+        return 1;
+    }
+
+    void startLoading(Info & info, bool forced_to_reload = false, size_t min_id_to_finish_loading_dependencies_ = 1)
+    {
+        if (info.is_loading())
+        {
+            if (!forced_to_reload)
+                return;
+            cancelLoading(info);
+        }
 
         /// All loadings have unique loading IDs.
-        size_t loading_id = next_loading_id++;
+        size_t loading_id = next_id_counter++;
         info.loading_id = loading_id;
         info.loading_start_time = std::chrono::system_clock::now();
         info.loading_end_time = TimePoint{};
@@ -804,24 +853,88 @@ private:
         if (enable_async_loading)
         {
             /// Put a job to the thread pool for the loading.
-            auto thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, name, loading_id, true};
-            loading_ids.try_emplace(loading_id, std::move(thread));
+            auto thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, info.name, loading_id, forced_to_reload, min_id_to_finish_loading_dependencies_, true};
+            loading_threads.try_emplace(loading_id, std::move(thread));
         }
         else
         {
             /// Perform the loading immediately.
-            doLoading(name, loading_id, false);
+            doLoading(info.name, loading_id, forced_to_reload, min_id_to_finish_loading_dependencies_, false);
         }
     }
 
-    /// Load one object, returns object ptr or exception
-    /// Do not require locking
-
-    std::pair<LoadablePtr, std::exception_ptr> loadOneObject(
-        const String & name,
-        const ObjectConfig & config,
-        LoadablePtr previous_version)
+    void cancelLoading(Info & info)
     {
+        if (!info.is_loading())
+            return;
+
+        /// In fact we cannot actually CANCEL the loading (because it's possibly already being performed in another thread).
+        /// But we can reset the `loading_id` and doLoading() will understand it as a signal to stop loading.
+        info.loading_id = info.state_id;
+        info.loading_end_time = std::chrono::system_clock::now();
+    }
+
+    /// Does the loading, possibly in the separate thread.
+    void doLoading(const String & name, size_t loading_id, bool forced_to_reload, size_t min_id_to_finish_loading_dependencies_, bool async)
+    {
+        try
+        {
+            /// Prepare for loading.
+            std::optional<Info> info;
+            {
+                LoadingGuardForAsyncLoad lock(async, mutex);
+                info = prepareToLoadSingleObject(name, loading_id, min_id_to_finish_loading_dependencies_, lock);
+                if (!info)
+                    return;
+            }
+
+            /// Previous version can be used as the base for new loading, enabling loading only part of data.
+            auto previous_version_as_base_for_loading = info->object;
+            if (forced_to_reload)
+                previous_version_as_base_for_loading = nullptr; /// Need complete reloading, cannot use the previous version.
+
+            /// Loading.
+            auto [new_object, new_exception] = loadSingleObject(name, info->object_config, previous_version_as_base_for_loading);
+            if (!new_object && !new_exception)
+                throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR);
+
+            /// Saving the result of the loading.
+            {
+                LoadingGuardForAsyncLoad lock(async, mutex);
+                saveResultOfLoadingSingleObject(name, loading_id, info->object, new_object, new_exception, info->error_count, lock);
+                finishLoadingSingleObject(name, loading_id, lock);
+            }
+            event.notify_all();
+        }
+        catch (...)
+        {
+            LoadingGuardForAsyncLoad lock(async, mutex);
+            finishLoadingSingleObject(name, loading_id, lock);
+            throw;
+        }
+    }
+
+    /// Returns single object info, checks loading_id and name.
+    std::optional<Info> prepareToLoadSingleObject(
+        const String & name, size_t loading_id, size_t min_id_to_finish_loading_dependencies_, const LoadingGuardForAsyncLoad &)
+    {
+        Info * info = getInfo(name);
+        /// We check here if this is exactly the same loading as we planned to perform.
+        /// This check is necessary because the object could be removed or load with another config before this thread even starts.
+        if (!info || !info->is_loading() || (info->loading_id != loading_id))
+            return {};
+
+        min_id_to_finish_loading_dependencies[std::this_thread::get_id()] = min_id_to_finish_loading_dependencies_;
+        return *info;
+    }
+
+    /// Load one object, returns object ptr or exception.
+    std::pair<LoadablePtr, std::exception_ptr>
+    loadSingleObject(const String & name, const ObjectConfig & config, LoadablePtr previous_version)
+    {
+        /// Use `create_function` to perform the actual loading.
+        /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time
+        /// and require access to other objects.
         LoadablePtr new_object;
         std::exception_ptr new_exception;
         try
@@ -833,44 +946,18 @@ private:
             new_exception = std::current_exception();
         }
         return std::make_pair(new_object, new_exception);
-
     }
 
-    /// Return single object info, checks loading_id and name
-    std::optional<Info> getSingleObjectInfo(const String & name, size_t loading_id, bool async)
-    {
-        LoadingGuardForAsyncLoad lock(async, mutex);
-        Info * info = getInfo(name);
-        if (!info || !info->loading() || (info->loading_id != loading_id))
-            return {};
-
-        return *info;
-    }
-
-    /// Removes object loading_id from loading_ids if it present
-    /// in other case do nothin should by done with lock
-    void finishObjectLoading(size_t loading_id, const LoadingGuardForAsyncLoad &)
-    {
-        auto it = loading_ids.find(loading_id);
-        if (it != loading_ids.end())
-        {
-            it->second.detach();
-            loading_ids.erase(it);
-        }
-    }
-
-    /// Process loading result
-    /// Calculates next update time and process errors
-    void processLoadResult(
+    /// Saves the result of the loading, calculates the time of the next update, and handles errors.
+    void saveResultOfLoadingSingleObject(
         const String & name,
         size_t loading_id,
         LoadablePtr previous_version,
         LoadablePtr new_object,
         std::exception_ptr new_exception,
         size_t error_count,
-        bool async)
+        const LoadingGuardForAsyncLoad &)
     {
-        LoadingGuardForAsyncLoad lock(async, mutex);
         /// Calculate a new update time.
         TimePoint next_update_time;
         try
@@ -895,9 +982,9 @@ private:
 
         Info * info = getInfo(name);
 
-        /// And again we should check if this is still the same loading as we were doing.
+        /// We should check if this is still the same loading as we were doing.
         /// This is necessary because the object could be removed or load with another config while the `mutex` was unlocked.
-        if (!info || !info->loading() || (info->loading_id != loading_id))
+        if (!info || !info->is_loading() || (info->loading_id != loading_id))
             return;
 
         if (new_exception)
@@ -921,79 +1008,27 @@ private:
         info->exception = new_exception;
         info->error_count = error_count;
         info->loading_end_time = std::chrono::system_clock::now();
-        info->loading_id = 0;
+        info->state_id = info->loading_id;
         info->next_update_time = next_update_time;
-
-        info->forced_to_reload = false;
-        if (new_object)
-            info->config_changed = false;
-
-        finishObjectLoading(loading_id, lock);
     }
 
-
-    /// Does the loading, possibly in the separate thread.
-    void doLoading(const String & name, size_t loading_id, bool async)
-    {
-        try
-        {
-            /// We check here if this is exactly the same loading as we planned to perform.
-            /// This check is necessary because the object could be removed or load with another config before this thread even starts.
-            std::optional<Info> info = getSingleObjectInfo(name, loading_id, async);
-            if (!info)
-                return;
-
-            /// Use `create_function` to perform the actual loading.
-            /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time
-            /// and require access to other objects.
-            bool need_complete_loading = !info->object || info->config_changed || info->forced_to_reload;
-            auto [new_object, new_exception] = loadOneObject(name, info->object_config, need_complete_loading ? nullptr : info->object);
-            if (!new_object && !new_exception)
-                throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR);
-
-
-            processLoadResult(name, loading_id, info->object, new_object, new_exception, info->error_count, async);
-            event.notify_all();
-        }
-        catch (...)
-        {
-            LoadingGuardForAsyncLoad lock(async, mutex);
-            finishObjectLoading(loading_id, lock);
-            throw;
-        }
-    }
-
-    void cancelLoading(const String & name)
+    /// Removes the references to the loading thread from the maps.
+    void finishLoadingSingleObject(const String & name, size_t loading_id, const LoadingGuardForAsyncLoad &)
     {
         Info * info = getInfo(name);
-        if (info)
-            cancelLoading(*info);
+        if (info && (info->loading_id == loading_id))
+            info->loading_id = info->state_id;
+
+        min_id_to_finish_loading_dependencies.erase(std::this_thread::get_id());
+
+        auto it = loading_threads.find(loading_id);
+        if (it != loading_threads.end())
+        {
+            it->second.detach();
+            loading_threads.erase(it);
+        }
     }
 
-    void cancelLoading(Info & info)
-    {
-        if (!info.loading())
-            return;
-
-        /// In fact we cannot actually CANCEL the loading (because it's possibly already being performed in another thread).
-        /// But we can reset the `loading_id` and doLoading() will understand it as a signal to stop loading.
-        info.loading_id = 0;
-        info.loading_end_time = std::chrono::system_clock::now();
-    }
-
-    void checkLoaded(const String & name, const Info & info)
-    {
-        if (info.loaded())
-            return;
-        if (info.loading())
-            throw Exception(type_name + " '" + name + "' is still loading.", ErrorCodes::BAD_ARGUMENTS);
-        if (info.failed())
-            std::rethrow_exception(info.exception);
-    }
-
-    /// Filter by name which matches everything.
-    static bool allNames(const String &) { return true; }
-
     /// Calculate next update time for loaded_object. Can be called without mutex locking,
     /// because single loadable can be loaded in single thread only.
     TimePoint calculateNextUpdateTime(const LoadablePtr & loaded_object, size_t error_count) const
@@ -1030,8 +1065,9 @@ private:
     std::unordered_map<String, Info> infos;
     bool always_load_everything = false;
     std::atomic<bool> enable_async_loading = false;
-    std::unordered_map<size_t, ThreadFromGlobalPool> loading_ids;
-    size_t next_loading_id = 1; /// should always be > 0
+    std::unordered_map<size_t, ThreadFromGlobalPool> loading_threads;
+    std::unordered_map<std::thread::id, size_t> min_id_to_finish_loading_dependencies;
+    size_t next_id_counter = 1; /// should always be > 0
     mutable pcg64 rnd_engine{randomSeed()};
 };
 
@@ -1101,14 +1137,15 @@ private:
 };
 
 
-ExternalLoader::ExternalLoader(const String & type_name_, Logger * log)
-    : config_files_reader(std::make_unique<LoadablesConfigReader>(type_name_, log))
+ExternalLoader::ExternalLoader(const String & type_name_, Logger * log_)
+    : config_files_reader(std::make_unique<LoadablesConfigReader>(type_name_, log_))
     , loading_dispatcher(std::make_unique<LoadingDispatcher>(
           std::bind(&ExternalLoader::createObject, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3),
           type_name_,
-          log))
+          log_))
     , periodic_updater(std::make_unique<PeriodicUpdater>(*config_files_reader, *loading_dispatcher))
     , type_name(type_name_)
+    , log(log_)
 {
 }
 
@@ -1155,29 +1192,26 @@ ExternalLoader::Status ExternalLoader::getCurrentStatus(const String & name) con
     return loading_dispatcher->getCurrentStatus(name);
 }
 
-ExternalLoader::LoadResult ExternalLoader::getCurrentLoadResult(const String & name) const
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::getCurrentLoadResult(const String & name) const
 {
-    return loading_dispatcher->getCurrentLoadResult(name);
+    return loading_dispatcher->getCurrentLoadResult<ReturnType>(name);
 }
 
-ExternalLoader::LoadResults ExternalLoader::getCurrentLoadResults() const
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::getCurrentLoadResults(const FilterByNameFunction & filter) const
 {
-    return loading_dispatcher->getCurrentLoadResults();
-}
-
-ExternalLoader::LoadResults ExternalLoader::getCurrentLoadResults(const FilterByNameFunction & filter_by_name) const
-{
-    return loading_dispatcher->getCurrentLoadResults(filter_by_name);
+    return loading_dispatcher->getCurrentLoadResults<ReturnType>(filter);
 }
 
 ExternalLoader::Loadables ExternalLoader::getCurrentlyLoadedObjects() const
 {
-    return loading_dispatcher->getCurrentlyLoadedObjects();
+    return getCurrentLoadResults<Loadables>();
 }
 
-ExternalLoader::Loadables ExternalLoader::getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const
+ExternalLoader::Loadables ExternalLoader::getCurrentlyLoadedObjects(const FilterByNameFunction & filter) const
 {
-    return loading_dispatcher->getCurrentlyLoadedObjects(filter_by_name);
+    return getCurrentLoadResults<Loadables>(filter);
 }
 
 size_t ExternalLoader::getNumberOfCurrentlyLoadedObjects() const
@@ -1185,56 +1219,104 @@ size_t ExternalLoader::getNumberOfCurrentlyLoadedObjects() const
     return loading_dispatcher->getNumberOfCurrentlyLoadedObjects();
 }
 
-void ExternalLoader::load(const String & name, LoadablePtr & loaded_object, Duration timeout) const
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::tryLoad(const String & name, Duration timeout) const
 {
-    loading_dispatcher->load(name, loaded_object, timeout);
+    return loading_dispatcher->tryLoad<ReturnType>(name, timeout);
 }
 
-void ExternalLoader::loadStrict(const String & name, LoadablePtr & loaded_object) const
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::tryLoad(const FilterByNameFunction & filter, Duration timeout) const
 {
-    loading_dispatcher->loadStrict(name, loaded_object);
+    return loading_dispatcher->tryLoad<ReturnType>(filter, timeout);
 }
 
-void ExternalLoader::load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout) const
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::load(const String & name) const
 {
-    if (filter_by_name)
-        loading_dispatcher->load(filter_by_name, loaded_objects, timeout);
-    else
-        loading_dispatcher->load(loaded_objects, timeout);
+    auto result = tryLoad<LoadResult>(name);
+    checkLoaded(result, false);
+    return convertTo<ReturnType>(result);
+}
+
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::load(const FilterByNameFunction & filter) const
+{
+    auto results = tryLoad<LoadResults>(filter);
+    checkLoaded(results, false);
+    return convertTo<ReturnType>(results);
+}
+
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::loadOrReload(const String & name) const
+{
+    loading_dispatcher->setConfiguration(config_files_reader->read());
+    auto result = loading_dispatcher->tryLoadOrReload<LoadResult>(name, WAIT);
+    checkLoaded(result, true);
+    return convertTo<ReturnType>(result);
+}
+
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::loadOrReload(const FilterByNameFunction & filter) const
+{
+    loading_dispatcher->setConfiguration(config_files_reader->read());
+    auto results = loading_dispatcher->tryLoadOrReload<LoadResults>(filter, WAIT);
+    checkLoaded(results, true);
+    return convertTo<ReturnType>(results);
+}
+
+template <typename ReturnType, typename>
+ReturnType ExternalLoader::reloadAllTriedToLoad() const
+{
+    std::unordered_set<String> names;
+    boost::range::copy(getAllTriedToLoadNames(), std::inserter(names, names.end()));
+    return loadOrReload<ReturnType>([&names](const String & name) { return names.count(name); });
+}
+
+Strings ExternalLoader::getAllTriedToLoadNames() const
+{
+    return loading_dispatcher->getAllTriedToLoadNames();
 }
 
 
-void ExternalLoader::load(const FilterByNameFunction & filter_by_name, LoadResults & loaded_objects, Duration timeout) const
+void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result,
+                                 bool check_no_errors) const
 {
-    if (filter_by_name)
-        loading_dispatcher->load(filter_by_name, loaded_objects, timeout);
-    else
-        loading_dispatcher->load(loaded_objects, timeout);
+    if (result.object && (!check_no_errors || !result.exception))
+        return;
+    if (result.status == ExternalLoader::Status::LOADING)
+        throw Exception(type_name + " '" + result.name + "' is still loading", ErrorCodes::BAD_ARGUMENTS);
+    if (result.exception)
+        std::rethrow_exception(result.exception);
+    if (result.status == ExternalLoader::Status::NOT_EXIST)
+        throw Exception(type_name + " '" + result.name + "' not found", ErrorCodes::BAD_ARGUMENTS);
+    if (result.status == ExternalLoader::Status::NOT_LOADED)
+        throw Exception(type_name + " '" + result.name + "' not tried to load", ErrorCodes::BAD_ARGUMENTS);
 }
 
-
-void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const
+void ExternalLoader::checkLoaded(const ExternalLoader::LoadResults & results,
+                                 bool check_no_errors) const
 {
-    return loading_dispatcher->load(loaded_objects, timeout);
+    std::exception_ptr exception;
+    for (const auto & result : results)
+    {
+        try
+        {
+            checkLoaded(result, check_no_errors);
+        }
+        catch (...)
+        {
+            if (!exception)
+                exception = std::current_exception();
+            else
+                tryLogCurrentException(log);
+        }
+    }
+
+    if (exception)
+        std::rethrow_exception(exception);
 }
 
-void ExternalLoader::reload(const String & name, bool load_never_loading) const
-{
-    reloadConfig();
-    loading_dispatcher->reload(name, load_never_loading);
-}
-
-void ExternalLoader::reload(bool load_never_loading) const
-{
-    reloadConfig();
-    loading_dispatcher->reload(load_never_loading);
-}
-
-void ExternalLoader::reload(const FilterByNameFunction & filter_by_name, bool load_never_loading) const
-{
-    reloadConfig();
-    loading_dispatcher->reload(filter_by_name, load_never_loading);
-}
 
 void ExternalLoader::reloadConfig() const
 {
@@ -1296,4 +1378,27 @@ std::ostream & operator<<(std::ostream & out, ExternalLoader::Status status)
     return out << toString(status);
 }
 
+
+template ExternalLoader::LoadablePtr ExternalLoader::getCurrentLoadResult<ExternalLoader::LoadablePtr>(const String &) const;
+template ExternalLoader::LoadResult ExternalLoader::getCurrentLoadResult<ExternalLoader::LoadResult>(const String &) const;
+template ExternalLoader::Loadables ExternalLoader::getCurrentLoadResults<ExternalLoader::Loadables>(const FilterByNameFunction &) const;
+template ExternalLoader::LoadResults ExternalLoader::getCurrentLoadResults<ExternalLoader::LoadResults>(const FilterByNameFunction &) const;
+
+template ExternalLoader::LoadablePtr ExternalLoader::tryLoad<ExternalLoader::LoadablePtr>(const String &, Duration) const;
+template ExternalLoader::LoadResult ExternalLoader::tryLoad<ExternalLoader::LoadResult>(const String &, Duration) const;
+template ExternalLoader::Loadables ExternalLoader::tryLoad<ExternalLoader::Loadables>(const FilterByNameFunction &, Duration) const;
+template ExternalLoader::LoadResults ExternalLoader::tryLoad<ExternalLoader::LoadResults>(const FilterByNameFunction &, Duration) const;
+
+template ExternalLoader::LoadablePtr ExternalLoader::load<ExternalLoader::LoadablePtr>(const String &) const;
+template ExternalLoader::LoadResult ExternalLoader::load<ExternalLoader::LoadResult>(const String &) const;
+template ExternalLoader::Loadables ExternalLoader::load<ExternalLoader::Loadables>(const FilterByNameFunction &) const;
+template ExternalLoader::LoadResults ExternalLoader::load<ExternalLoader::LoadResults>(const FilterByNameFunction &) const;
+
+template ExternalLoader::LoadablePtr ExternalLoader::loadOrReload<ExternalLoader::LoadablePtr>(const String &) const;
+template ExternalLoader::LoadResult ExternalLoader::loadOrReload<ExternalLoader::LoadResult>(const String &) const;
+template ExternalLoader::Loadables ExternalLoader::loadOrReload<ExternalLoader::Loadables>(const FilterByNameFunction &) const;
+template ExternalLoader::LoadResults ExternalLoader::loadOrReload<ExternalLoader::LoadResults>(const FilterByNameFunction &) const;
+
+template ExternalLoader::Loadables ExternalLoader::reloadAllTriedToLoad<ExternalLoader::Loadables>() const;
+template ExternalLoader::LoadResults ExternalLoader::reloadAllTriedToLoad<ExternalLoader::LoadResults>() const;
 }
diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h
index 71d5af71db2..9ccdc4bf39c 100644
--- a/dbms/src/Interpreters/ExternalLoader.h
+++ b/dbms/src/Interpreters/ExternalLoader.h
@@ -65,8 +65,8 @@ public:
 
     struct LoadResult
     {
-        LoadResult(Status status_) : status(status_) {}
-        Status status;
+        Status status = Status::NOT_EXIST;
+        String name;
         LoadablePtr object;
         String origin;
         TimePoint loading_start_time;
@@ -75,7 +75,13 @@ public:
         std::string repository_name;
     };
 
-    using LoadResults = std::vector<std::pair<String, LoadResult>>;
+    using LoadResults = std::vector<LoadResult>;
+
+    template <typename T>
+    static constexpr bool is_scalar_load_result_type = std::is_same_v<T, LoadResult> || std::is_same_v<T, LoadablePtr>;
+
+    template <typename T>
+    static constexpr bool is_vector_load_result_type = std::is_same_v<T, LoadResults> || std::is_same_v<T, Loadables>;
 
     ExternalLoader(const String & type_name_, Logger * log);
     virtual ~ExternalLoader();
@@ -105,63 +111,88 @@ public:
 
     /// Returns the result of loading the object.
     /// The function doesn't load anything, it just returns the current load result as is.
-    LoadResult getCurrentLoadResult(const String & name) const;
+    template <typename ReturnType = LoadResult, typename = std::enable_if_t<is_scalar_load_result_type<ReturnType>, void>>
+    ReturnType getCurrentLoadResult(const String & name) const;
 
     using FilterByNameFunction = std::function<bool(const String &)>;
 
     /// Returns all the load results as a map.
     /// The function doesn't load anything, it just returns the current load results as is.
-    LoadResults getCurrentLoadResults() const;
-    LoadResults getCurrentLoadResults(const FilterByNameFunction & filter_by_name) const;
+    template <typename ReturnType = LoadResults, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType getCurrentLoadResults() const { return getCurrentLoadResults<ReturnType>(alwaysTrue); }
+
+    template <typename ReturnType = LoadResults, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType getCurrentLoadResults(const FilterByNameFunction & filter) const;
 
     /// Returns all loaded objects as a map.
     /// The function doesn't load anything, it just returns the current load results as is.
     Loadables getCurrentlyLoadedObjects() const;
-    Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const;
-    size_t getNumberOfCurrentlyLoadedObjects() const;
+    Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter) const;
 
     /// Returns true if any object was loaded.
     bool hasCurrentlyLoadedObjects() const;
+    size_t getNumberOfCurrentlyLoadedObjects() const;
 
-    static constexpr Duration NO_TIMEOUT = Duration::max();
+    static constexpr Duration NO_WAIT = Duration::zero();
+    static constexpr Duration WAIT = Duration::max();
 
-    /// Tries to finish loading of a specified object during the timeout.
-    /// Returns nullptr if the loading is unsuccessful or if there is no such object.
-    void load(const String & name, LoadablePtr & loaded_object, Duration timeout = NO_TIMEOUT) const;
-    void load(const String & name) const { LoadablePtr object; load(name, object, Duration::zero()); }
-    LoadablePtr loadAndGet(const String & name, Duration timeout = NO_TIMEOUT) const { LoadablePtr object; load(name, object, timeout); return object; }
-    LoadablePtr tryGetLoadable(const String & name) const { return loadAndGet(name); }
+    /// Loads a specified object.
+    /// The function does nothing if it's already loaded.
+    /// The function doesn't throw an exception if it's failed to load.
+    template <typename ReturnType = LoadablePtr, typename = std::enable_if_t<is_scalar_load_result_type<ReturnType>, void>>
+    ReturnType tryLoad(const String & name, Duration timeout = WAIT) const;
 
-    /// Tries to finish loading of a specified object during the timeout.
-    /// Throws an exception if the loading is unsuccessful or if there is no such object.
-    void loadStrict(const String & name, LoadablePtr & loaded_object) const;
-    void loadStrict(const String & name) const { LoadablePtr object; loadStrict(name, object); }
-    LoadablePtr getLoadable(const String & name) const { LoadablePtr object; loadStrict(name, object); return object; }
+    /// Loads objects by filter.
+    /// The function does nothing for already loaded objects, it just returns them.
+    /// The function doesn't throw an exception if it's failed to load something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType tryLoad(const FilterByNameFunction & filter, Duration timeout = WAIT) const;
 
-    /// Tries to finish loading of the objects for which the specified function returns true.
-    void load(const FilterByNameFunction & filter_by_name) const { Loadables objects; load(filter_by_name, objects, Duration::zero()); }
-    void load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout = NO_TIMEOUT) const;
-    void load(const FilterByNameFunction & filter_by_name, LoadResults & load_results, Duration timeout = NO_TIMEOUT) const;
-    Loadables loadAndGet(const FilterByNameFunction & filter_by_name, Duration timeout = NO_TIMEOUT) const { Loadables loaded_objects; load(filter_by_name, loaded_objects, timeout); return loaded_objects; }
+    /// Loads all objects.
+    /// The function does nothing for already loaded objects, it just returns them.
+    /// The function doesn't throw an exception if it's failed to load something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType tryLoadAll(Duration timeout = WAIT) const { return tryLoad<ReturnType>(alwaysTrue, timeout); }
 
-    /// Tries to finish loading of all the objects during the timeout.
-    void load(Loadables & loaded_objects, Duration timeout = NO_TIMEOUT) const;
+    /// Loads a specified object.
+    /// The function does nothing if it's already loaded.
+    /// The function throws an exception if it's failed to load.
+    template <typename ReturnType = LoadablePtr, typename = std::enable_if_t<is_scalar_load_result_type<ReturnType>, void>>
+    ReturnType load(const String & name) const;
 
-    /// Starts reloading of a specified object.
-    /// `load_never_loading` specifies what to do if the object has never been loading before.
-    /// The function can either skip it (false) or load for the first time (true).
-    /// Also function can load dictionary synchronously
-    void reload(const String & name, bool load_never_loading = false) const;
+    /// Loads objects by filter.
+    /// The function does nothing for already loaded objects, it just returns them.
+    /// The function throws an exception if it's failed to load something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType load(const FilterByNameFunction & filter) const;
 
-    /// Starts reloading of all the objects.
-    /// `load_never_loading` specifies what to do with the objects which have never been loading before.
-    /// The function can either skip them (false) or load for the first time (true).
-    void reload(bool load_never_loading = false) const;
+    /// Loads all objects. Not recommended to use.
+    /// The function does nothing for already loaded objects, it just returns them.
+    /// The function throws an exception if it's failed to load something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType loadAll() const { return load<ReturnType>(alwaysTrue); }
 
-    /// Starts reloading of all objects matched `filter_by_name`.
-    /// `load_never_loading` specifies what to do with the objects which have never been loading before.
-    /// The function can either skip them (false) or load for the first time (true).
-    void reload(const FilterByNameFunction & filter_by_name, bool load_never_loading = false) const;
+    /// Loads or reloads a specified object.
+    /// The function reloads the object if it's already loaded.
+    /// The function throws an exception if it's failed to load or reload.
+    template <typename ReturnType = LoadablePtr, typename = std::enable_if_t<is_scalar_load_result_type<ReturnType>, void>>
+    ReturnType loadOrReload(const String & name) const;
+
+    /// Loads or reloads objects by filter.
+    /// The function reloads the objects which are already loaded.
+    /// The function throws an exception if it's failed to load or reload something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType loadOrReload(const FilterByNameFunction & filter) const;
+
+    /// Load or reloads all objects. Not recommended to use.
+    /// The function throws an exception if it's failed to load or reload something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType loadOrReloadAll() const { return loadOrReload<ReturnType>(alwaysTrue); }
+
+    /// Reloads objects by filter which were tried to load before (successfully or not).
+    /// The function throws an exception if it's failed to load or reload something.
+    template <typename ReturnType = Loadables, typename = std::enable_if_t<is_vector_load_result_type<ReturnType>, void>>
+    ReturnType reloadAllTriedToLoad() const;
 
     /// Reloads all config repositories.
     void reloadConfig() const;
@@ -176,8 +207,13 @@ protected:
     virtual LoadablePtr create(const String & name, const Poco::Util::AbstractConfiguration & config, const String & key_in_config, const String & repository_name) const = 0;
 
 private:
-    struct ObjectConfig;
+    void checkLoaded(const LoadResult & result, bool check_no_errors) const;
+    void checkLoaded(const LoadResults & results, bool check_no_errors) const;
 
+    static bool alwaysTrue(const String &) { return true; }
+    Strings getAllTriedToLoadNames() const;
+
+    struct ObjectConfig;
     LoadablePtr createObject(const String & name, const ObjectConfig & config, const LoadablePtr & previous_version) const;
 
     class LoadablesConfigReader;
@@ -190,6 +226,7 @@ private:
     std::unique_ptr<PeriodicUpdater> periodic_updater;
 
     const String type_name;
+    Poco::Logger * log;
 };
 
 String toString(ExternalLoader::Status status);
diff --git a/dbms/src/Interpreters/ExternalModelsLoader.h b/dbms/src/Interpreters/ExternalModelsLoader.h
index 14a4a94e665..753bad20ca0 100644
--- a/dbms/src/Interpreters/ExternalModelsLoader.h
+++ b/dbms/src/Interpreters/ExternalModelsLoader.h
@@ -22,7 +22,7 @@ public:
 
     ModelPtr getModel(const std::string & name) const
     {
-        return std::static_pointer_cast<const IModel>(getLoadable(name));
+        return std::static_pointer_cast<const IModel>(load(name));
     }
 
     void addConfigRepository(const String & name,
diff --git a/dbms/src/Interpreters/IJoin.h b/dbms/src/Interpreters/IJoin.h
index 5c005dc1b1c..c62f39e9edf 100644
--- a/dbms/src/Interpreters/IJoin.h
+++ b/dbms/src/Interpreters/IJoin.h
@@ -33,6 +33,7 @@ public:
     virtual bool alwaysReturnsEmptySet() const { return false; }
 
     virtual BlockInputStreamPtr createStreamWithNonJoinedRows(const Block &, UInt64) const { return {}; }
+    virtual bool hasStreamWithNonJoinedRows() const { return false; }
 };
 
 using JoinPtr = std::shared_ptr<IJoin>;
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 0ba5c9ecd6c..32f314c5007 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -770,10 +770,18 @@ InterpreterSelectQuery::analyzeExpressions(
             }
         }
 
+        bool has_stream_with_non_joned_rows = (res.before_join && res.before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
+        res.optimize_read_in_order =
+            context.getSettingsRef().optimize_read_in_order
+            && storage && query.orderBy()
+            && !query_analyzer.hasAggregation()
+            && !query.final()
+            && !has_stream_with_non_joned_rows;
+
         /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
         query_analyzer.appendSelect(chain, only_types || (res.need_aggregate ? !res.second_stage : !res.first_stage));
         res.selected_columns = chain.getLastStep().required_output;
-        res.has_order_by = query_analyzer.appendOrderBy(chain, only_types || (res.need_aggregate ? !res.second_stage : !res.first_stage));
+        res.has_order_by = query_analyzer.appendOrderBy(chain, only_types || (res.need_aggregate ? !res.second_stage : !res.first_stage), res.optimize_read_in_order);
         res.before_order_and_select = chain.getLastActions();
         chain.addStep();
 
@@ -943,87 +951,6 @@ static UInt64 getLimitForSorting(const ASTSelectQuery & query, const Context & c
 }
 
 
-static InputSortingInfoPtr optimizeReadInOrder(const MergeTreeData & merge_tree, const ASTSelectQuery & query,
-    const Context & context, const SyntaxAnalyzerResultPtr & global_syntax_result)
-{
-    if (!merge_tree.hasSortingKey())
-        return {};
-
-    auto order_descr = getSortDescription(query, context);
-    SortDescription order_key_prefix_descr;
-    int read_direction = order_descr.at(0).direction;
-
-    const auto & sorting_key_columns = merge_tree.getSortingKeyColumns();
-    size_t prefix_size = std::min(order_descr.size(), sorting_key_columns.size());
-
-    for (size_t i = 0; i < prefix_size; ++i)
-    {
-        if (global_syntax_result->array_join_result_to_source.count(order_descr[i].column_name))
-            break;
-
-        /// Optimize in case of exact match with order key element
-        ///  or in some simple cases when order key element is wrapped into monotonic function.
-        int current_direction = order_descr[i].direction;
-        if (order_descr[i].column_name == sorting_key_columns[i] && current_direction == read_direction)
-            order_key_prefix_descr.push_back(order_descr[i]);
-        else
-        {
-            auto ast = query.orderBy()->children[i]->children.at(0);
-            auto syntax_result = SyntaxAnalyzer(context).analyze(ast, global_syntax_result->required_source_columns);
-            auto actions = ExpressionAnalyzer(ast, syntax_result, context).getActions(true);
-
-            const auto & input_columns = actions->getRequiredColumnsWithTypes();
-            if (input_columns.size() != 1 || input_columns.front().name != sorting_key_columns[i])
-                break;
-
-            bool first = true;
-            for (const auto & action : actions->getActions())
-            {
-                if (action.type != ExpressionAction::APPLY_FUNCTION)
-                    continue;
-
-                if (!first)
-                {
-                    current_direction = 0;
-                    break;
-                }
-                else
-                    first = false;
-
-                const auto & func = *action.function_base;
-                if (!func.hasInformationAboutMonotonicity())
-                {
-                    current_direction = 0;
-                    break;
-                }
-
-                auto monotonicity = func.getMonotonicityForRange(*input_columns.front().type, {}, {});
-                if (!monotonicity.is_monotonic)
-                {
-                    current_direction = 0;
-                    break;
-                }
-                else if (!monotonicity.is_positive)
-                    current_direction *= -1;
-            }
-
-            if (!current_direction || (i > 0 && current_direction != read_direction))
-                break;
-
-            if (i == 0)
-                read_direction = current_direction;
-
-            order_key_prefix_descr.push_back(order_descr[i]);
-        }
-    }
-
-    if (order_key_prefix_descr.empty())
-        return {};
-
-    return std::make_shared<InputSortingInfo>(std::move(order_key_prefix_descr), read_direction);
-}
-
-
 template <typename TPipeline>
 void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputStreamPtr & prepared_input, QueryPipeline & save_context_and_storage)
 {
@@ -1044,13 +971,6 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
     const Settings & settings = context->getSettingsRef();
     auto & expressions = analysis_result;
 
-    InputSortingInfoPtr input_sorting_info;
-    if (settings.optimize_read_in_order && storage && query.orderBy() && !query_analyzer->hasAggregation() && !query.final() && !query.join())
-    {
-        if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
-            input_sorting_info = optimizeReadInOrder(*merge_tree_data, query, *context, syntax_analyzer_result);
-    }
-
     if (options.only_analyze)
     {
         if constexpr (pipeline_with_processors)
@@ -1108,7 +1028,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
             throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
 
         /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
-        executeFetchColumns(from_stage, pipeline, input_sorting_info, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere, save_context_and_storage);
+        executeFetchColumns(from_stage, pipeline, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere, save_context_and_storage);
 
         LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(options.to_stage));
     }
@@ -1367,7 +1287,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
 template <typename TPipeline>
 void InterpreterSelectQuery::executeFetchColumns(
         QueryProcessingStage::Enum processing_stage, TPipeline & pipeline,
-        const InputSortingInfoPtr & input_sorting_info, const PrewhereInfoPtr & prewhere_info, const Names & columns_to_remove_after_prewhere,
+        const PrewhereInfoPtr & prewhere_info, const Names & columns_to_remove_after_prewhere,
         QueryPipeline & save_context_and_storage)
 {
     constexpr bool pipeline_with_processors = std::is_same<TPipeline, QueryPipeline>::value;
@@ -1691,7 +1611,19 @@ void InterpreterSelectQuery::executeFetchColumns(
         query_info.syntax_analyzer_result = syntax_analyzer_result;
         query_info.sets = query_analyzer->getPreparedSets();
         query_info.prewhere_info = prewhere_info;
-        query_info.input_sorting_info = input_sorting_info;
+
+        /// Create optimizer with prepared actions.
+        /// Maybe we will need to calc input_sorting_info later, e.g. while reading from StorageMerge.
+        if (analysis_result.optimize_read_in_order)
+        {
+            query_info.order_by_optimizer = std::make_shared<ReadInOrderOptimizer>(
+                query_analyzer->getOrderByActions(),
+                getSortDescription(query, *context),
+                query_info.syntax_analyzer_result);
+
+            query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
+        }
+
 
         BlockInputStreams streams;
         Pipes pipes;
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h
index 6b95d7aeea7..ca39d488102 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.h
@@ -12,6 +12,7 @@
 #include <Interpreters/SelectQueryOptions.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/TableStructureLockHolder.h>
+#include <Storages/ReadInOrderOptimizer.h>
 
 #include <Processors/QueryPipeline.h>
 #include <Columns/FilterDescription.h>
@@ -152,6 +153,7 @@ private:
         bool has_limit_by   = false;
 
         bool remove_where_filter = false;
+        bool optimize_read_in_order = false;
 
         ExpressionActionsPtr before_join;   /// including JOIN
         ExpressionActionsPtr before_where;
@@ -201,7 +203,7 @@ private:
 
     template <typename TPipeline>
     void executeFetchColumns(QueryProcessingStage::Enum processing_stage, TPipeline & pipeline,
-        const InputSortingInfoPtr & sorting_info, const PrewhereInfoPtr & prewhere_info,
+        const PrewhereInfoPtr & prewhere_info,
         const Names & columns_to_remove_after_prewhere,
         QueryPipeline & save_context_and_storage);
 
diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp
index c742ac37a5f..b5ca231c13d 100644
--- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp
@@ -140,6 +140,9 @@ BlockIO InterpreterSystemQuery::execute()
     if (!query.target_table.empty() && query.target_database.empty())
          query.target_database = context.getCurrentDatabase();
 
+    if (!query.target_dictionary.empty() && !query.target_database.empty())
+        query.target_dictionary = query.target_database + "." + query.target_dictionary;
+
     switch (query.type)
     {
         case Type::SHUTDOWN:
@@ -167,11 +170,11 @@ BlockIO InterpreterSystemQuery::execute()
             break;
 #endif
         case Type::RELOAD_DICTIONARY:
-            system_context.getExternalDictionariesLoader().reload(query.target_dictionary, true /* load the dictionary even if it wasn't loading before */);
+            system_context.getExternalDictionariesLoader().loadOrReload(query.target_dictionary);
             break;
         case Type::RELOAD_DICTIONARIES:
             executeCommandsAndThrowIfError(
-                    [&] () { system_context.getExternalDictionariesLoader().reload(); },
+                    [&] () { system_context.getExternalDictionariesLoader().reloadAllTriedToLoad(); },
                     [&] () { system_context.getEmbeddedDictionaries().reload(); }
             );
             break;
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 4da687ac1e4..793b74ff890 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -125,6 +125,7 @@ Join::Join(std::shared_ptr<AnalyzedJoin> table_join_, const Block & right_sample
     , nullable_left_side(table_join->forceNullableLeft())
     , any_take_last_row(any_take_last_row_)
     , asof_inequality(table_join->getAsofInequality())
+    , data(std::make_shared<RightTableData>())
     , log(&Logger::get("Join"))
 {
     setSampleBlock(right_sample_block);
@@ -260,26 +261,26 @@ struct KeyGetterForType
 
 void Join::init(Type type_)
 {
-    type = type_;
+    data->type = type_;
 
     if (kind == ASTTableJoin::Kind::Cross)
         return;
-    joinDispatchInit(kind, strictness, maps);
-    joinDispatch(kind, strictness, maps, [&](auto, auto, auto & map) { map.create(type); });
+    joinDispatchInit(kind, strictness, data->maps);
+    joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { map.create(data->type); });
 }
 
 size_t Join::getTotalRowCount() const
 {
     size_t res = 0;
 
-    if (type == Type::CROSS)
+    if (data->type == Type::CROSS)
     {
-        for (const auto & block : blocks)
+        for (const auto & block : data->blocks)
             res += block.rows();
     }
     else
     {
-        joinDispatch(kind, strictness, maps, [&](auto, auto, auto & map) { res += map.getTotalRowCount(type); });
+        joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalRowCount(data->type); });
     }
 
     return res;
@@ -289,15 +290,15 @@ size_t Join::getTotalByteCount() const
 {
     size_t res = 0;
 
-    if (type == Type::CROSS)
+    if (data->type == Type::CROSS)
     {
-        for (const auto & block : blocks)
+        for (const auto & block : data->blocks)
             res += block.bytes();
     }
     else
     {
-        joinDispatch(kind, strictness, maps, [&](auto, auto, auto & map) { res += map.getTotalByteCountImpl(type); });
-        res += pool.size();
+        joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalByteCountImpl(data->type); });
+        res += data->pool.size();
     }
 
     return res;
@@ -482,6 +483,8 @@ void Join::initRequiredRightKeys()
 
 void Join::initRightBlockStructure()
 {
+    auto & saved_block_sample = data->sample_block;
+
     if (isRightOrFull(kind))
     {
         /// Save keys for NonJoinedBlockInputStream
@@ -504,7 +507,7 @@ void Join::initRightBlockStructure()
 Block Join::structureRightBlock(const Block & block) const
 {
     Block structured_block;
-    for (auto & sample_column : saved_block_sample.getColumnsWithTypeAndName())
+    for (auto & sample_column : savedBlockSample().getColumnsWithTypeAndName())
     {
         ColumnWithTypeAndName column = block.getByName(sample_column.name);
         if (sample_column.column->isNullable())
@@ -543,24 +546,24 @@ bool Join::addJoinedBlock(const Block & source_block)
     size_t total_bytes = 0;
 
     {
-        std::unique_lock lock(rwlock);
+        std::unique_lock lock(data->rwlock);
 
-        blocks.emplace_back(std::move(structured_block));
-        Block * stored_block = &blocks.back();
+        data->blocks.emplace_back(std::move(structured_block));
+        Block * stored_block = &data->blocks.back();
 
         if (rows)
-            has_no_rows_in_maps = false;
+            data->empty = false;
 
         if (kind != ASTTableJoin::Kind::Cross)
         {
-            joinDispatch(kind, strictness, maps, [&](auto, auto strictness_, auto & map)
+            joinDispatch(kind, strictness, data->maps, [&](auto, auto strictness_, auto & map)
             {
-                insertFromBlockImpl<strictness_>(*this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
+                insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
             });
         }
 
         if (save_nullmap)
-            blocks_nullmaps.emplace_back(stored_block, null_map_holder);
+            data->blocks_nullmaps.emplace_back(stored_block, null_map_holder);
 
         /// TODO: Do not calculate them every time
         total_rows = getTotalRowCount();
@@ -915,12 +918,12 @@ void Join::joinBlockImpl(
     if constexpr (is_asof_join)
         extras.push_back(right_table_keys.getByName(key_names_right.back()));
 
-    AddedColumns added_columns(sample_block_with_columns_to_add, block_with_columns_to_add, block, saved_block_sample,
+    AddedColumns added_columns(sample_block_with_columns_to_add, block_with_columns_to_add, block, savedBlockSample(),
                                extras, *this, key_columns, key_sizes);
     bool has_required_right_keys = (required_right_keys.columns() != 0);
     added_columns.need_filter = need_filter || has_required_right_keys;
 
-    IColumn::Filter row_filter = switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, type, null_map);
+    IColumn::Filter row_filter = switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
 
     for (size_t i = 0; i < added_columns.size(); ++i)
         block.insert(added_columns.moveColumn(i));
@@ -1012,7 +1015,7 @@ void Join::joinBlockImplCross(Block & block) const
 
     for (size_t i = 0; i < rows_left; ++i)
     {
-        for (const Block & block_right : blocks)
+        for (const Block & block_right : data->blocks)
         {
             size_t rows_right = block_right.rows();
 
@@ -1050,7 +1053,7 @@ static void checkTypeOfKey(const Block & block_left, const Block & block_right)
 
 DataTypePtr Join::joinGetReturnType(const String & column_name) const
 {
-    std::shared_lock lock(rwlock);
+    std::shared_lock lock(data->rwlock);
 
     if (!sample_block_with_columns_to_add.has(column_name))
         throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR);
@@ -1071,7 +1074,7 @@ void Join::joinGetImpl(Block & block, const String & column_name, const Maps & m
 // TODO: return array of values when strictness == ASTTableJoin::Strictness::All
 void Join::joinGet(Block & block, const String & column_name) const
 {
-    std::shared_lock lock(rwlock);
+    std::shared_lock lock(data->rwlock);
 
     if (key_names_right.size() != 1)
         throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::LOGICAL_ERROR);
@@ -1081,7 +1084,7 @@ void Join::joinGet(Block & block, const String & column_name) const
     if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
         kind == ASTTableJoin::Kind::Left)
     {
-        joinGetImpl(block, column_name, std::get<MapsOne>(maps));
+        joinGetImpl(block, column_name, std::get<MapsOne>(data->maps));
     }
     else
         throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR);
@@ -1090,12 +1093,12 @@ void Join::joinGet(Block & block, const String & column_name) const
 
 void Join::joinBlock(Block & block)
 {
-    std::shared_lock lock(rwlock);
+    std::shared_lock lock(data->rwlock);
 
     const Names & key_names_left = table_join->keyNamesLeft();
     JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right);
 
-    if (joinDispatch(kind, strictness, maps, [&](auto kind_, auto strictness_, auto & map)
+    if (joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map)
         {
             joinBlockImpl<kind_, strictness_>(block, key_names_left, sample_block_with_columns_to_add, map);
         }))
@@ -1172,7 +1175,7 @@ public:
             const String & right_key_name = parent.table_join->keyNamesRight()[i];
 
             size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
-            size_t right_key_pos = parent.saved_block_sample.getPositionByName(right_key_name);
+            size_t right_key_pos = parent.savedBlockSample().getPositionByName(right_key_name);
 
             if (remap_keys && !parent.required_right_keys.has(right_key_name))
                 left_to_right_key_remap[left_key_pos] = right_key_pos;
@@ -1194,9 +1197,10 @@ public:
                 column_indices_left.emplace_back(left_pos);
         }
 
-        for (size_t right_pos = 0; right_pos < parent.saved_block_sample.columns(); ++right_pos)
+        const auto & saved_block_sample = parent.savedBlockSample();
+        for (size_t right_pos = 0; right_pos < saved_block_sample.columns(); ++right_pos)
         {
-            const String & name = parent.saved_block_sample.getByPosition(right_pos).name;
+            const String & name = saved_block_sample.getByPosition(right_pos).name;
             if (!result_sample_block.has(name))
                 continue;
 
@@ -1225,7 +1229,7 @@ public:
 protected:
     Block readImpl() override
     {
-        if (parent.blocks.empty())
+        if (parent.data->blocks.empty())
             return Block();
         return createBlock();
     }
@@ -1262,14 +1266,14 @@ private:
 
     bool hasNullabilityChange(size_t right_pos, size_t result_pos) const
     {
-        const auto & src = parent.saved_block_sample.getByPosition(right_pos).column;
+        const auto & src = parent.savedBlockSample().getByPosition(right_pos).column;
         const auto & dst = result_sample_block.getByPosition(result_pos).column;
         return src->isNullable() != dst->isNullable();
     }
 
     Block createBlock()
     {
-        MutableColumns columns_right = parent.saved_block_sample.cloneEmptyColumns();
+        MutableColumns columns_right = parent.savedBlockSample().cloneEmptyColumns();
 
         size_t rows_added = 0;
 
@@ -1278,7 +1282,7 @@ private:
             rows_added = fillColumnsFromMap<strictness>(map, columns_right);
         };
 
-        if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
+        if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps, fill_callback))
             throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
 
         fillNullsFromBlocks(columns_right, rows_added);
@@ -1329,7 +1333,7 @@ private:
     template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
     size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
     {
-        switch (parent.type)
+        switch (parent.data->type)
         {
         #define M(TYPE) \
             case Join::Type::TYPE: \
@@ -1337,7 +1341,7 @@ private:
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
             default:
-                throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(parent.type)),
+                throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(parent.data->type)),
                                 ErrorCodes::UNSUPPORTED_JOIN_KEYS);
         }
 
@@ -1380,9 +1384,9 @@ private:
     void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
     {
         if (!nulls_position.has_value())
-            nulls_position = parent.blocks_nullmaps.begin();
+            nulls_position = parent.data->blocks_nullmaps.begin();
 
-        auto end = parent.blocks_nullmaps.end();
+        auto end = parent.data->blocks_nullmaps.end();
 
         for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it)
         {
@@ -1414,4 +1418,14 @@ BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sam
     return {};
 }
 
+
+bool Join::hasStreamWithNonJoinedRows() const
+{
+     if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
+        table_join->strictness() == ASTTableJoin::Strictness::Semi)
+        return false;
+
+    return isRightOrFull(table_join->kind());
+}
+
 }
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index ff46380db13..c8c9376b926 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -148,7 +148,7 @@ class Join : public IJoin
 public:
     Join(std::shared_ptr<AnalyzedJoin> table_join_, const Block & right_sample_block, bool any_take_last_row_ = false);
 
-    bool empty() { return type == Type::EMPTY; }
+    bool empty() { return data->type == Type::EMPTY; }
 
     /** Add block of data from right hand of JOIN to the map.
       * Returns false, if some limit was exceeded and you should not insert more data.
@@ -179,13 +179,14 @@ public:
       * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
       */
     BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;
+    bool hasStreamWithNonJoinedRows() const override;
 
     /// Number of keys in all built JOIN maps.
     size_t getTotalRowCount() const final;
     /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools.
     size_t getTotalByteCount() const;
 
-    bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && has_no_rows_in_maps; }
+    bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && data->empty; }
 
     ASTTableJoin::Kind getKind() const { return kind; }
     ASTTableJoin::Strictness getStrictness() const { return strictness; }
@@ -294,6 +295,30 @@ public:
     using MapsAsof =            MapsTemplate<JoinStuff::MappedAsof>;
 
     using MapsVariant = std::variant<MapsOne, MapsAll, MapsOneFlagged, MapsAllFlagged, MapsAsof>;
+    using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
+
+    struct RightTableData
+    {
+        /// Protect state for concurrent use in insertFromBlock and joinBlock.
+        /// @note that these methods could be called simultaneously only while use of StorageJoin.
+        mutable std::shared_mutex rwlock;
+
+        Type type = Type::EMPTY;
+        bool empty = true;
+
+        MapsVariant maps;
+        Block sample_block; /// Block as it would appear in the BlockList
+        BlocksList blocks; /// Blocks of "right" table.
+        BlockNullmapList blocks_nullmaps; /// Nullmaps for blocks of "right" table (if needed)
+
+        /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
+        Arena pool;
+    };
+
+    void reuseJoinedData(const Join & join)
+    {
+        data = join.data;
+    }
 
 private:
     friend class NonJoinedBlockInputStream;
@@ -306,33 +331,14 @@ private:
     /// Names of key columns in right-side table (in the order they appear in ON/USING clause). @note It could contain duplicates.
     const Names & key_names_right;
 
-    /// In case of LEFT and FULL joins, if use_nulls, convert right-side columns to Nullable.
-    bool nullable_right_side;
-    /// In case of RIGHT and FULL joins, if use_nulls, convert left-side columns to Nullable.
-    bool nullable_left_side;
-
-    /// Overwrite existing values when encountering the same key again
-    bool any_take_last_row;
-
-    /// Blocks of "right" table.
-    BlocksList blocks;
-
-    /// Nullmaps for blocks of "right" table (if needed)
-    using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
-    BlockNullmapList blocks_nullmaps;
-
-    MapsVariant maps;
-    bool has_no_rows_in_maps = true;
-
-    /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
-    Arena pool;
-
-    Type type = Type::EMPTY;
+    bool nullable_right_side; /// In case of LEFT and FULL joins, if use_nulls, convert right-side columns to Nullable.
+    bool nullable_left_side; /// In case of RIGHT and FULL joins, if use_nulls, convert left-side columns to Nullable.
+    bool any_take_last_row; /// Overwrite existing values when encountering the same key again
     std::optional<AsofRowRefs::Type> asof_type;
     ASOF::Inequality asof_inequality;
 
-    static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
-
+    /// Right table data. StorageJoin shares it between many Join objects.
+    std::shared_ptr<RightTableData> data;
     Sizes key_sizes;
 
     /// Block with columns from the right-side table except key columns.
@@ -344,26 +350,18 @@ private:
     /// Left table column names that are sources for required_right_keys columns
     std::vector<String> required_right_keys_sources;
 
-    /// Block as it would appear in the BlockList
-    Block saved_block_sample;
-
     Poco::Logger * log;
 
     Block totals;
 
-    /** Protect state for concurrent use in insertFromBlock and joinBlock.
-      * Note that these methods could be called simultaneously only while use of StorageJoin,
-      *  and StorageJoin only calls these two methods.
-      * That's why another methods are not guarded.
-      */
-    mutable std::shared_mutex rwlock;
-
     void init(Type type_);
 
     /** Set information about structure of right hand of JOIN (joined data).
       */
     void setSampleBlock(const Block & block);
 
+    const Block & savedBlockSample() const { return data->sample_block; }
+
     /// Modify (structure) right block to save it in block list
     Block structureRightBlock(const Block & stored_block) const;
     void initRightBlockStructure();
@@ -380,6 +378,8 @@ private:
 
     template <typename Maps>
     void joinGetImpl(Block & block, const String & column_name, const Maps & maps) const;
+
+    static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
 };
 
 }
diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp
index 54b48e8a471..31470b08468 100644
--- a/dbms/src/Interpreters/MutationsInterpreter.cpp
+++ b/dbms/src/Interpreters/MutationsInterpreter.cpp
@@ -139,7 +139,7 @@ bool isStorageTouchedByMutations(
             return true;
     }
 
-    context_copy.getSettingsRef().merge_tree_uniform_read_distribution = 0;
+    context_copy.getSettingsRef().max_streams_to_max_threads_ratio = 1;
     context_copy.getSettingsRef().max_threads = 1;
 
     ASTPtr select_query = prepareQueryAffectedAST(commands);
diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp
index 4e7525bb176..604404b0bf7 100644
--- a/dbms/src/Parsers/ASTSystemQuery.cpp
+++ b/dbms/src/Parsers/ASTSystemQuery.cpp
@@ -93,20 +93,30 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
     settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM " << (settings.hilite ? hilite_none : "");
     settings.ostr << typeToString(type);
 
-    auto print_database_table = [&] ()
+    auto print_database_table = [&]
     {
         settings.ostr << " ";
-
         if (!target_database.empty())
         {
             settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_database)
                           << (settings.hilite ? hilite_none : "") << ".";
         }
-
         settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_table)
                       << (settings.hilite ? hilite_none : "");
     };
 
+    auto print_database_dictionary = [&]
+    {
+        settings.ostr << " ";
+        if (!target_database.empty())
+        {
+            settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_database)
+                          << (settings.hilite ? hilite_none : "") << ".";
+        }
+        settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_dictionary)
+                      << (settings.hilite ? hilite_none : "");
+    };
+
     if (   type == Type::STOP_MERGES
         || type == Type::START_MERGES
         || type == Type::STOP_TTL_MERGES
@@ -130,7 +140,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
         print_database_table();
     }
     else if (type == Type::RELOAD_DICTIONARY)
-        settings.ostr << " " << backQuoteIfNeed(target_dictionary);
+        print_database_dictionary();
 }
 
 
diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp
index 0a5bd1bf63e..f75e9216210 100644
--- a/dbms/src/Parsers/ParserSystemQuery.cpp
+++ b/dbms/src/Parsers/ParserSystemQuery.cpp
@@ -1,7 +1,8 @@
 #include <Parsers/ParserSystemQuery.h>
 #include <Parsers/ASTSystemQuery.h>
 #include <Parsers/CommonParsers.h>
-#include <Parsers/parseIdentifierOrStringLiteral.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/ASTLiteral.h>
 #include <Parsers/parseDatabaseAndTableName.h>
 
 
@@ -41,9 +42,14 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
     switch (res->type)
     {
         case Type::RELOAD_DICTIONARY:
-            if (!parseIdentifierOrStringLiteral(pos, expected, res->target_dictionary))
+        {
+            ASTPtr ast;
+            if (ParserStringLiteral{}.parse(pos, ast, expected))
+                res->target_dictionary = ast->as<ASTLiteral &>().value.safeGet<String>();
+            else if (!parseDatabaseAndTableName(pos, expected, res->target_database, res->target_dictionary))
                 return false;
             break;
+        }
 
         case Type::RESTART_REPLICA:
         case Type::SYNC_REPLICA:
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index 2befe19308a..111d1ff7aab 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -81,7 +81,10 @@ namespace
 
 StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
     StorageDistributed & storage_, const std::string & name_, const ConnectionPoolPtr & pool_, ActionBlocker & monitor_blocker_)
-    : storage(storage_), pool{pool_}, path{storage.path + name_ + '/'}
+    : storage(storage_)
+    , pool{pool_}
+    , name{name_}
+    , path{storage.path + name + '/'}
     , current_batch_file_path{path + "current_batch.txt"}
     , default_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()}
     , sleep_time{default_sleep_time}
@@ -642,4 +645,11 @@ std::string StorageDistributedDirectoryMonitor::getLoggerName() const
     return storage.table_name + '.' + storage.getName() + ".DirectoryMonitor";
 }
 
+void StorageDistributedDirectoryMonitor::updatePath()
+{
+    std::lock_guard lock{mutex};
+    path = storage.path + name + '/';
+    current_batch_file_path = path + "current_batch.txt";
+}
+
 }
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h
index be613aec6e0..7e8f6a298f7 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.h
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h
@@ -26,6 +26,8 @@ public:
 
     static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage);
 
+    void updatePath();
+
     void flushAllData();
 
     void shutdownAndDropAllData();
@@ -43,6 +45,7 @@ private:
 
     StorageDistributed & storage;
     ConnectionPoolPtr pool;
+    std::string name;
     std::string path;
 
     bool should_batch_inserts = false;
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index 239eade0354..d732243c370 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -30,6 +30,7 @@
 #include <Common/setThreadName.h>
 #include <Common/typeid_cast.h>
 #include <common/logger_useful.h>
+#include <Common/quoteString.h>
 
 
 namespace DB
@@ -364,7 +365,7 @@ bool StorageKafka::streamToViews()
 {
     auto table = global_context.getTable(database_name, table_name);
     if (!table)
-        throw Exception("Engine table " + database_name + "." + table_name + " doesn't exist.", ErrorCodes::LOGICAL_ERROR);
+        throw Exception("Engine table " + backQuote(database_name) + "." + backQuote(table_name) + " doesn't exist.", ErrorCodes::LOGICAL_ERROR);
 
     // Create an INSERT query for streaming data
     auto insert = std::make_shared<ASTInsertQuery>();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 2c8365fa316..5919e5a2670 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -938,7 +938,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
     auto storage_from_source_part = StorageFromMergeTreeDataPart::create(source_part);
 
     auto context_for_reading = context;
-    context_for_reading.getSettingsRef().merge_tree_uniform_read_distribution = 0;
+    context_for_reading.getSettingsRef().max_streams_to_max_threads_ratio = 1;
     context_for_reading.getSettingsRef().max_threads = 1;
 
     std::vector<MutationCommand> commands_for_part;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 841be3e11d2..36969ed18fe 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -12,6 +12,7 @@
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
 #include <Storages/MergeTree/KeyCondition.h>
+#include <Storages/ReadInOrderOptimizer.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
@@ -747,9 +748,13 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
         use_uncompressed_cache = false;
 
     Pipes res;
+    if (0 == sum_marks)
+        return res;
 
-    if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1)
+    if (num_streams > 1)
     {
+        /// Parallel query execution.
+
         /// Reduce the number of num_streams if the data is small.
         if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams)
             num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size());
@@ -777,82 +782,22 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
             res.emplace_back(std::move(source));
         }
     }
-    else if (sum_marks > 0)
+    else
     {
-        const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1;
+        /// Sequential query execution.
 
-        for (size_t i = 0; i < num_streams && !parts.empty(); ++i)
+        for (size_t part_index = 0; part_index < parts.size(); ++part_index)
         {
-            size_t need_marks = min_marks_per_stream;
+            RangesInDataPart & part = parts[part_index];
 
-            /// Loop over parts.
-            /// We will iteratively take part or some subrange of a part from the back
-            ///  and assign a stream to read from it.
-            while (need_marks > 0 && !parts.empty())
-            {
-                RangesInDataPart part = parts.back();
-                parts.pop_back();
+            auto source = std::make_shared<MergeTreeSelectProcessor>(
+                data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
+                settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache,
+                query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true,
+                virt_columns, part.part_index_in_query);
 
-                size_t & marks_in_part = sum_marks_in_parts.back();
-
-                /// We will not take too few rows from a part.
-                if (marks_in_part >= min_marks_for_concurrent_read &&
-                    need_marks < min_marks_for_concurrent_read)
-                    need_marks = min_marks_for_concurrent_read;
-
-                /// Do not leave too few rows in the part.
-                if (marks_in_part > need_marks &&
-                    marks_in_part - need_marks < min_marks_for_concurrent_read)
-                    need_marks = marks_in_part;
-
-                MarkRanges ranges_to_get_from_part;
-
-                /// We take the whole part if it is small enough.
-                if (marks_in_part <= need_marks)
-                {
-                    /// Restore the order of segments.
-                    std::reverse(part.ranges.begin(), part.ranges.end());
-
-                    ranges_to_get_from_part = part.ranges;
-
-                    need_marks -= marks_in_part;
-                    sum_marks_in_parts.pop_back();
-                }
-                else
-                {
-                    /// Loop through ranges in part. Take enough ranges to cover "need_marks".
-                    while (need_marks > 0)
-                    {
-                        if (part.ranges.empty())
-                            throw Exception("Unexpected end of ranges while spreading marks among streams", ErrorCodes::LOGICAL_ERROR);
-
-                        MarkRange & range = part.ranges.back();
-
-                        const size_t marks_in_range = range.end - range.begin;
-                        const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks);
-
-                        ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range);
-                        range.begin += marks_to_get_from_range;
-                        marks_in_part -= marks_to_get_from_range;
-                        need_marks -= marks_to_get_from_range;
-                        if (range.begin == range.end)
-                            part.ranges.pop_back();
-                    }
-                    parts.emplace_back(part);
-                }
-
-                auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
-                    data, part.data_part, max_block_size, settings.preferred_block_size_bytes,
-                    settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part,
-                    use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io,
-                    settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query);
-
-                res.emplace_back(std::move(source_processor));
-            }
+            res.emplace_back(std::move(source));
         }
-
-        if (!parts.empty())
-            throw Exception("Couldn't spread marks among streams", ErrorCodes::LOGICAL_ERROR);
     }
 
     return res;
@@ -1102,8 +1047,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
 
     Pipes pipes;
 
-    /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL
-
     for (size_t part_index = 0; part_index < parts.size(); ++part_index)
     {
         RangesInDataPart & part = parts[part_index];
diff --git a/dbms/src/Storages/ReadInOrderOptimizer.cpp b/dbms/src/Storages/ReadInOrderOptimizer.cpp
new file mode 100644
index 00000000000..cceaf9af578
--- /dev/null
+++ b/dbms/src/Storages/ReadInOrderOptimizer.cpp
@@ -0,0 +1,113 @@
+#include <Storages/ReadInOrderOptimizer.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Interpreters/AnalyzedJoin.h>
+#include <Functions/IFunction.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+ReadInOrderOptimizer::ReadInOrderOptimizer(
+    const ManyExpressionActions & elements_actions_,
+    const SortDescription & required_sort_description_,
+    const SyntaxAnalyzerResultPtr & syntax_result)
+    : elements_actions(elements_actions_)
+    , required_sort_description(required_sort_description_)
+{
+    if (elements_actions.size() != required_sort_description.size())
+        throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR);
+
+    /// Do not analyze joined columns.
+    /// They may have aliases and come to descriprion as is.
+    /// We can mismatch them with order key columns at stage of fetching columns.
+    for (const auto & elem : syntax_result->array_join_result_to_source)
+        forbidden_columns.insert(elem.first);
+}
+
+InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
+{
+    const MergeTreeData * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get());
+    if (!merge_tree || !merge_tree->hasSortingKey())
+        return {};
+
+    SortDescription order_key_prefix_descr;
+    int read_direction = required_sort_description.at(0).direction;
+
+    const auto & sorting_key_columns = merge_tree->getSortingKeyColumns();
+    size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
+
+    for (size_t i = 0; i < prefix_size; ++i)
+    {
+        if (forbidden_columns.count(required_sort_description[i].column_name))
+            break;
+
+        /// Optimize in case of exact match with order key element
+        ///  or in some simple cases when order key element is wrapped into monotonic function.
+        int current_direction = required_sort_description[i].direction;
+        if (required_sort_description[i].column_name == sorting_key_columns[i] && current_direction == read_direction)
+            order_key_prefix_descr.push_back(required_sort_description[i]);
+        else
+        {
+            /// Allow only one simple monotonic functions with one argument
+            bool found_function = false;
+            for (const auto & action : elements_actions[i]->getActions())
+            {
+                if (action.type != ExpressionAction::APPLY_FUNCTION)
+                    continue;
+
+                if (found_function)
+                {
+                    current_direction = 0;
+                    break;
+                }
+                else
+                    found_function = true;
+
+                if (action.argument_names.size() != 1 || action.argument_names.at(0) != sorting_key_columns[i])
+                {
+                    current_direction = 0;
+                    break;
+                }
+
+                const auto & func = *action.function_base;
+                if (!func.hasInformationAboutMonotonicity())
+                {
+                    current_direction = 0;
+                    break;
+                }
+
+                auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
+                if (!monotonicity.is_monotonic)
+                {
+                    current_direction = 0;
+                    break;
+                }
+                else if (!monotonicity.is_positive)
+                    current_direction *= -1;
+            }
+
+            if (!found_function)
+                current_direction = 0;
+
+            if (!current_direction || (i > 0 && current_direction != read_direction))
+                break;
+
+            if (i == 0)
+                read_direction = current_direction;
+
+            order_key_prefix_descr.push_back(required_sort_description[i]);
+        }
+    }
+
+    if (order_key_prefix_descr.empty())
+        return {};
+
+    return std::make_shared<InputSortingInfo>(std::move(order_key_prefix_descr), read_direction);
+}
+
+}
diff --git a/dbms/src/Storages/ReadInOrderOptimizer.h b/dbms/src/Storages/ReadInOrderOptimizer.h
new file mode 100644
index 00000000000..8416d23a912
--- /dev/null
+++ b/dbms/src/Storages/ReadInOrderOptimizer.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <Core/SortDescription.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/SelectQueryInfo.h>
+
+namespace DB
+{
+
+/** Helper class, that can analyze MergeTree order key
+ *   and required sort description to get their
+ *   common prefix, which is needed for
+ *   performing reading in order of PK.
+ */
+class ReadInOrderOptimizer
+{
+public:
+    ReadInOrderOptimizer(
+        const ManyExpressionActions & elements_actions,
+        const SortDescription & required_sort_description,
+        const SyntaxAnalyzerResultPtr & syntax_result);
+
+    InputSortingInfoPtr getInputOrder(const StoragePtr & storage) const;
+
+private:
+    /// Actions for every element of order expression to analyze functions for monotonicicy
+    ManyExpressionActions elements_actions;
+    NameSet forbidden_columns;
+    SortDescription required_sort_description;
+};
+
+}
diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h
index 23dcf9e3ddf..11907151575 100644
--- a/dbms/src/Storages/SelectQueryInfo.h
+++ b/dbms/src/Storages/SelectQueryInfo.h
@@ -41,15 +41,25 @@ struct InputSortingInfo
 
     InputSortingInfo(const SortDescription & order_key_prefix_descr_, int direction_)
         : order_key_prefix_descr(order_key_prefix_descr_), direction(direction_) {}
+
+    bool operator ==(const InputSortingInfo & other) const
+    {
+        return order_key_prefix_descr == other.order_key_prefix_descr && direction == other.direction;
+    }
+
+    bool operator !=(const InputSortingInfo & other) const { return !(*this == other); }
 };
 
 using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
 using FilterInfoPtr = std::shared_ptr<FilterInfo>;
-using InputSortingInfoPtr = std::shared_ptr<InputSortingInfo>;
+using InputSortingInfoPtr = std::shared_ptr<const InputSortingInfo>;
 
 struct SyntaxAnalyzerResult;
 using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
 
+class ReadInOrderOptimizer;
+using ReadInOrderOptimizerPtr = std::shared_ptr<const ReadInOrderOptimizer>;
+
 /** Query along with some additional data,
   *  that can be used during query processing
   *  inside storage engines.
@@ -62,7 +72,9 @@ struct SelectQueryInfo
 
     PrewhereInfoPtr prewhere_info;
 
-    InputSortingInfoPtr input_sorting_info;
+    ReadInOrderOptimizerPtr order_by_optimizer;
+    /// We can modify it while reading from storage
+    mutable InputSortingInfoPtr input_sorting_info;
 
     /// Prepared sets are used for indices by storage engine.
     /// Example: x IN (1, 2, 3)
diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp
index 15e5c0f9d41..666dba7cec6 100644
--- a/dbms/src/Storages/StorageBuffer.cpp
+++ b/dbms/src/Storages/StorageBuffer.cpp
@@ -165,6 +165,9 @@ BlockInputStreams StorageBuffer::read(
 
         if (dst_has_same_structure)
         {
+            if (query_info.order_by_optimizer)
+                query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(destination);
+
             /// The destination table has the same structure of the requested columns and we can simply read blocks from there.
             streams_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
         }
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index 152712a72f4..f93f014e079 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -596,6 +596,21 @@ void StorageDistributed::flushClusterNodesAllData()
         it->second.flushAllData();
 }
 
+void StorageDistributed::rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name,
+                                TableStructureWriteLockHolder &)
+{
+    table_name = new_table_name;
+    database_name = new_database_name;
+    if (!path.empty())
+    {
+        Poco::File(path).renameTo(new_path_to_db + escapeForFileName(new_table_name));
+        path = new_path_to_db + escapeForFileName(new_table_name) + '/';
+        std::lock_guard lock(cluster_nodes_mutex);
+        for (auto & node : cluster_nodes_data)
+            node.second.directory_monitor->updatePath();
+    }
+}
+
 
 void registerStorageDistributed(StorageFactory & factory)
 {
diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h
index e2409fe136e..7d4bda94ef3 100644
--- a/dbms/src/Storages/StorageDistributed.h
+++ b/dbms/src/Storages/StorageDistributed.h
@@ -82,11 +82,7 @@ public:
     /// Removes temporary data in local filesystem.
     void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override;
 
-    void rename(const String & /*new_path_to_db*/, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override
-    {
-        table_name = new_table_name;
-        database_name = new_database_name;
-    }
+    void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
 
     /// in the sub-tables, you need to manually add and delete columns
     /// the structure of the sub-table is not checked
diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp
index 12444867b6b..5e1e16cfa6a 100644
--- a/dbms/src/Storages/StorageJoin.cpp
+++ b/dbms/src/Storages/StorageJoin.cpp
@@ -67,11 +67,19 @@ void StorageJoin::truncate(const ASTPtr &, const Context &, TableStructureWriteL
 }
 
 
-void StorageJoin::assertCompatible(ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) const
+HashJoinPtr StorageJoin::getJoin(std::shared_ptr<AnalyzedJoin> analyzed_join) const
 {
-    /// NOTE Could be more loose.
-    if (!(kind == kind_ && strictness == strictness_))
+    if (!(kind == analyzed_join->kind() && strictness == analyzed_join->strictness()))
         throw Exception("Table " + table_name + " has incompatible type of JOIN.", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
+
+    /// TODO: check key columns
+
+    /// Some HACK to remove wrong names qualifiers: table.column -> column.
+    analyzed_join->setRightKeys(key_names);
+
+    HashJoinPtr join_clone = std::make_shared<Join>(analyzed_join, getSampleBlock().sortColumns());
+    join_clone->reuseJoinedData(*join);
+    return join_clone;
 }
 
 
@@ -201,7 +209,7 @@ class JoinBlockInputStream : public IBlockInputStream
 {
 public:
     JoinBlockInputStream(const Join & parent_, UInt64 max_block_size_, Block && sample_block_)
-        : parent(parent_), lock(parent.rwlock), max_block_size(max_block_size_), sample_block(std::move(sample_block_))
+        : parent(parent_), lock(parent.data->rwlock), max_block_size(max_block_size_), sample_block(std::move(sample_block_))
     {
         columns.resize(sample_block.columns());
         column_indices.resize(sample_block.columns());
@@ -231,11 +239,11 @@ public:
 protected:
     Block readImpl() override
     {
-        if (parent.blocks.empty())
+        if (parent.data->blocks.empty())
             return Block();
 
         Block block;
-        if (!joinDispatch(parent.kind, parent.strictness, parent.maps,
+        if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps,
                 [&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
             throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR);
         return block;
@@ -278,7 +286,7 @@ private:
 
         size_t rows_added = 0;
 
-        switch (parent.type)
+        switch (parent.data->type)
         {
 #define M(TYPE)                                           \
     case Join::Type::TYPE:                                \
@@ -288,7 +296,7 @@ private:
 #undef M
 
             default:
-                throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast<UInt32>(parent.type)),
+                throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast<UInt32>(parent.data->type)),
                                 ErrorCodes::UNSUPPORTED_JOIN_KEYS);
         }
 
diff --git a/dbms/src/Storages/StorageJoin.h b/dbms/src/Storages/StorageJoin.h
index cfafd118768..ab974a07bfa 100644
--- a/dbms/src/Storages/StorageJoin.h
+++ b/dbms/src/Storages/StorageJoin.h
@@ -31,6 +31,7 @@ public:
 
     /// Access the innards.
     HashJoinPtr & getJoin() { return join; }
+    HashJoinPtr getJoin(std::shared_ptr<AnalyzedJoin> analyzed_join) const;
 
     /// Verify that the data structure is suitable for implementing this type of JOIN.
     void assertCompatible(ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) const;
diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp
index f4021284bc8..b94cdba682d 100644
--- a/dbms/src/Storages/StorageMaterializedView.cpp
+++ b/dbms/src/Storages/StorageMaterializedView.cpp
@@ -15,6 +15,7 @@
 #include <DataStreams/IBlockOutputStream.h>
 
 #include <Storages/StorageFactory.h>
+#include <Storages/ReadInOrderOptimizer.h>
 
 #include <Common/typeid_cast.h>
 
@@ -200,6 +201,9 @@ BlockInputStreams StorageMaterializedView::read(
 {
     auto storage = getTargetTable();
     auto lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
+    if (query_info.order_by_optimizer)
+        query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
+
     auto streams = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
     for (auto & stream : streams)
         stream->addTableLock(lock);
diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp
index f74c81750a1..f2cfa62a375 100644
--- a/dbms/src/Storages/StorageMerge.cpp
+++ b/dbms/src/Storages/StorageMerge.cpp
@@ -209,6 +209,24 @@ BlockInputStreams StorageMerge::read(
     num_streams *= num_streams_multiplier;
     size_t remaining_streams = num_streams;
 
+    InputSortingInfoPtr input_sorting_info;
+    if (query_info.order_by_optimizer)
+    {
+        for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
+        {
+            auto current_info = query_info.order_by_optimizer->getInputOrder(it->first);
+            if (it == selected_tables.begin())
+                input_sorting_info = current_info;
+            else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
+                input_sorting_info.reset();
+
+            if (!input_sorting_info)
+                break;
+        }
+
+        query_info.input_sorting_info = input_sorting_info;
+    }
+
     for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
     {
         size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count);
diff --git a/dbms/src/Storages/StorageXDBC.cpp b/dbms/src/Storages/StorageXDBC.cpp
index bab751e4f36..222eebd6377 100644
--- a/dbms/src/Storages/StorageXDBC.cpp
+++ b/dbms/src/Storages/StorageXDBC.cpp
@@ -1,12 +1,12 @@
+#include "StorageXDBC.h"
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Parsers/ASTLiteral.h>
 #include <Storages/StorageFactory.h>
-#include <Storages/StorageXDBC.h>
 #include <Storages/transformQueryForExternalDatabase.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <common/logger_useful.h>
-
+#include <Formats/FormatFactory.h>
 #include <IO/CompressionMethod.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
@@ -15,6 +15,7 @@
 #include <Poco/Path.h>
 #include <Common/ShellCommand.h>
 #include <ext/range.h>
+
 namespace DB
 {
 namespace ErrorCodes
diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp
index 12c882dee9d..c8e19fed086 100644
--- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp
+++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp
@@ -48,19 +48,19 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes()
 void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & /*query_info*/) const
 {
     const auto & external_dictionaries = context.getExternalDictionariesLoader();
-    for (const auto & [dict_name, load_result] : external_dictionaries.getCurrentLoadResults())
+    for (const auto & load_result : external_dictionaries.getCurrentLoadResults())
     {
         if (startsWith(load_result.repository_name, IExternalLoaderConfigRepository::INTERNAL_REPOSITORY_NAME_PREFIX))
             continue;
 
         size_t i = 0;
         String database;
-        String short_name = dict_name;
+        String short_name = load_result.name;
 
-        if (!load_result.repository_name.empty() && startsWith(dict_name, load_result.repository_name + "."))
+        if (!load_result.repository_name.empty() && startsWith(load_result.name, load_result.repository_name + "."))
         {
             database = load_result.repository_name;
-            short_name = dict_name.substr(load_result.repository_name.length() + 1);
+            short_name = load_result.name.substr(load_result.repository_name.length() + 1);
         }
 
         res_columns[i++]->insert(database);
diff --git a/dbms/src/Storages/System/StorageSystemModels.cpp b/dbms/src/Storages/System/StorageSystemModels.cpp
index 325dd9ebbd0..67594b8692d 100644
--- a/dbms/src/Storages/System/StorageSystemModels.cpp
+++ b/dbms/src/Storages/System/StorageSystemModels.cpp
@@ -30,9 +30,9 @@ void StorageSystemModels::fillData(MutableColumns & res_columns, const Context &
     const auto & external_models_loader = context.getExternalModelsLoader();
     auto load_results = external_models_loader.getCurrentLoadResults();
 
-    for (const auto & [model_name, load_result] : load_results)
+    for (const auto & load_result : load_results)
     {
-        res_columns[0]->insert(model_name);
+        res_columns[0]->insert(load_result.name);
         res_columns[1]->insert(static_cast<Int8>(load_result.status));
         res_columns[2]->insert(load_result.origin);
 
diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp
index 0d0ed080d8f..c3167dd1c20 100644
--- a/dbms/src/Storages/registerStorages.cpp
+++ b/dbms/src/Storages/registerStorages.cpp
@@ -38,12 +38,9 @@ void registerStorages()
     registerStorageHDFS(factory);
     #endif
 
-    #if USE_POCO_SQLODBC || USE_POCO_DATAODBC
     registerStorageODBC(factory);
-    #endif
     registerStorageJDBC(factory);
 
-
     #if USE_MYSQL
     registerStorageMySQL(factory);
     #endif
diff --git a/dbms/src/Storages/registerStorages.h b/dbms/src/Storages/registerStorages.h
index 522289b2715..b88b2666a8f 100644
--- a/dbms/src/Storages/registerStorages.h
+++ b/dbms/src/Storages/registerStorages.h
@@ -31,10 +31,7 @@ void registerStorageS3(StorageFactory & factory);
 void registerStorageHDFS(StorageFactory & factory);
 #endif
 
-#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
 void registerStorageODBC(StorageFactory & factory);
-#endif
-
 void registerStorageJDBC(StorageFactory & factory);
 
 #if USE_MYSQL
diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp
index d6987a331e0..35021cd46d0 100644
--- a/dbms/src/TableFunctions/registerTableFunctions.cpp
+++ b/dbms/src/TableFunctions/registerTableFunctions.cpp
@@ -24,9 +24,7 @@ void registerTableFunctions()
     registerTableFunctionHDFS(factory);
 #endif
 
-#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
     registerTableFunctionODBC(factory);
-#endif
     registerTableFunctionJDBC(factory);
 
 #if USE_MYSQL
diff --git a/dbms/src/TableFunctions/registerTableFunctions.h b/dbms/src/TableFunctions/registerTableFunctions.h
index 78b6a6917db..66f2dda90ea 100644
--- a/dbms/src/TableFunctions/registerTableFunctions.h
+++ b/dbms/src/TableFunctions/registerTableFunctions.h
@@ -21,10 +21,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory);
 void registerTableFunctionHDFS(TableFunctionFactory & factory);
 #endif
 
-#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
 void registerTableFunctionODBC(TableFunctionFactory & factory);
-#endif
-
 void registerTableFunctionJDBC(TableFunctionFactory & factory);
 
 #if USE_MYSQL
diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 1874440705b..f119ca5d94b 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -534,6 +534,8 @@ services:
             {app_net}
                 {ipv4_address}
                 {ipv6_address}
+                {net_aliases}
+                    {net_alias1}
 '''
 
 
@@ -900,17 +902,17 @@ class ClickHouseInstance:
         if self.stay_alive:
             entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND
 
-        ipv4_address = ipv6_address = ""
-        if self.ipv4_address is None and self.ipv6_address is None:
-            networks = ""
-            app_net = ""
-        else:
+        networks = app_net = ipv4_address = ipv6_address = net_aliases = net_alias1 = ""
+        if self.ipv4_address is not None or self.ipv6_address is not None or self.hostname != self.name:
             networks = "networks:"
             app_net = "default:"
             if self.ipv4_address is not None:
                 ipv4_address = "ipv4_address: " + self.ipv4_address
             if self.ipv6_address is not None:
                 ipv6_address = "ipv6_address: " + self.ipv6_address
+            if self.hostname != self.name:
+                net_aliases = "aliases:"
+                net_alias1 = "- " + self.hostname
 
         if not self.with_installed_binary:
             binary_volume = "- " + self.server_bin_path + ":/usr/bin/clickhouse"
@@ -940,6 +942,8 @@ class ClickHouseInstance:
                 app_net=app_net,
                 ipv4_address=ipv4_address,
                 ipv6_address=ipv6_address,
+                net_aliases = net_aliases,
+                net_alias1 = net_alias1,
             ))
 
     def destroy_dir(self):
diff --git a/dbms/tests/integration/test_allowed_client_hosts/__init__.py b/dbms/tests/integration/test_allowed_client_hosts/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/integration/test_allowed_client_hosts/configs/users.d/network.xml b/dbms/tests/integration/test_allowed_client_hosts/configs/users.d/network.xml
new file mode 100644
index 00000000000..cb4f22d2657
--- /dev/null
+++ b/dbms/tests/integration/test_allowed_client_hosts/configs/users.d/network.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<yandex>
+    <users>
+        <default>
+            <!-- List of networks with open access.
+
+                 To open access from everywhere, specify:
+                    <ip>::/0</ip>
+
+                 To open access only from localhost, specify:
+                    <ip>::1</ip>
+                    <ip>127.0.0.1</ip>
+
+                 Each element of list has one of the following forms:
+                 <ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
+                     2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
+                 <host> Hostname. Example: server01.yandex.ru.
+                     To check access, DNS query is performed, and all received addresses compared to peer address.
+                 <host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
+                     To check access, DNS PTR query is performed for peer address and then regexp is applied.
+                     Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
+                     Strongly recommended that regexp is ends with $
+                 All results of DNS requests are cached till server restart.
+            -->
+            <networks>
+                <ip>127.0.0.1</ip>
+                <host>clientA1.com</host>
+                <host>clientA3.com</host>
+                <host_regexp>clientB\d+\.ru</host_regexp>
+                <host_regexp>clientC\d+\.ru$</host_regexp>
+                <host_regexp>^clientD\d+\.ru$</host_regexp>
+            </networks>
+        </default>
+    </users>
+</yandex>
diff --git a/dbms/tests/integration/test_allowed_client_hosts/configs/users.xml b/dbms/tests/integration/test_allowed_client_hosts/configs/users.xml
new file mode 100644
index 00000000000..3142ec5355a
--- /dev/null
+++ b/dbms/tests/integration/test_allowed_client_hosts/configs/users.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<yandex>
+    <profiles>
+        <default>
+        </default>
+    </profiles>
+    <users>
+        <default>
+            <profile>default</profile>
+            <password></password>
+        </default>
+    </users>
+</yandex>
diff --git a/dbms/tests/integration/test_allowed_client_hosts/test.py b/dbms/tests/integration/test_allowed_client_hosts/test.py
new file mode 100644
index 00000000000..fcdf408c88a
--- /dev/null
+++ b/dbms/tests/integration/test_allowed_client_hosts/test.py
@@ -0,0 +1,60 @@
+import os
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+server = cluster.add_instance('server', config_dir="configs")
+
+clientA1 = cluster.add_instance('clientA1', hostname = 'clientA1.com')
+clientA2 = cluster.add_instance('clientA2', hostname = 'clientA2.com')
+clientA3 = cluster.add_instance('clientA3', hostname = 'clientA3.com')
+clientB1 = cluster.add_instance('clientB1', hostname = 'clientB001.ru')
+clientB2 = cluster.add_instance('clientB2', hostname = 'clientB002.ru')
+clientB3 = cluster.add_instance('clientB3', hostname = 'xxx.clientB003.rutracker.com')
+clientC1 = cluster.add_instance('clientC1', hostname = 'clientC01.ru')
+clientC2 = cluster.add_instance('clientC2', hostname = 'xxx.clientC02.ru')
+clientC3 = cluster.add_instance('clientC3', hostname = 'xxx.clientC03.rutracker.com')
+clientD1 = cluster.add_instance('clientD1', hostname = 'clientD0001.ru')
+clientD2 = cluster.add_instance('clientD2', hostname = 'xxx.clientD0002.ru')
+clientD3 = cluster.add_instance('clientD3', hostname = 'clientD0003.ru')
+
+
+def query_from_one_node_to_another(client_node, server_node, query):
+    return client_node.exec_in_container(["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)])
+
+
+def query(node, query):
+    return query_from_one_node_to_another(node, node, query)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup_nodes():
+    try:
+        cluster.start()
+        query(server, "CREATE TABLE test_table (x Int32) ENGINE = MergeTree() ORDER BY tuple()")
+        query(server, "INSERT INTO test_table VALUES (5)")
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_allowed_host():
+    expected_to_pass = [clientA1, clientA3]
+    expected_to_fail = [clientA2]
+
+    # Reverse DNS lookup currently isn't working as expected in this test.
+    # For example, it gives something like "vitbartestallowedclienthosts_clientB1_1.vitbartestallowedclienthosts_default" instead of "clientB001.ru".
+    # Maybe we should setup the test network better.
+    #expected_to_pass.extend([clientB1, clientB2, clientB3, clientC1, clientC2, clientD1, clientD3])
+    #expected_to_fail.extend([clientC3, clientD2])
+
+    for client_node in expected_to_pass:
+        assert query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table") == "5\n"
+
+    for client_node in expected_to_fail:
+        with pytest.raises(Exception) as e:
+            query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table")
+        assert "User default is not allowed to connect from address" in str(e)
diff --git a/dbms/tests/integration/test_dictionaries_update_and_reload/test.py b/dbms/tests/integration/test_dictionaries_update_and_reload/test.py
index b972dc6c918..434ebc7d505 100644
--- a/dbms/tests/integration/test_dictionaries_update_and_reload/test.py
+++ b/dbms/tests/integration/test_dictionaries_update_and_reload/test.py
@@ -2,6 +2,7 @@ import pytest
 import os
 import time
 from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryTimeoutExceedException
 from helpers.test_tools import assert_eq_with_retry
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@@ -53,8 +54,9 @@ def test_reload_while_loading(started_cluster):
     assert get_status('slow') == "NOT_LOADED"
     assert get_loading_duration('slow') == 0
 
-    # It's not possible to get a value from the dictionary within 1.0 second, so the following query fails by timeout.
-    assert query("SELECT dictGetInt32('slow', 'a', toUInt64(5))", timeout = 1, ignore_error = True) == ""
+    # It's not possible to get a value from the dictionary within 0.5 second, so the following query fails by timeout.
+    with pytest.raises(QueryTimeoutExceedException):
+        query("SELECT dictGetInt32('slow', 'a', toUInt64(5))", timeout = 0.5)
 
     # The dictionary is now loading.
     assert get_status('slow') == "LOADING"
@@ -69,7 +71,8 @@ def test_reload_while_loading(started_cluster):
     assert duration >= prev_duration
 
     # SYSTEM RELOAD DICTIONARY should restart loading.
-    query("SYSTEM RELOAD DICTIONARY 'slow'")
+    with pytest.raises(QueryTimeoutExceedException):
+        query("SYSTEM RELOAD DICTIONARY 'slow'", timeout = 0.5)
     assert get_status('slow') == "LOADING"
     prev_start_time, prev_duration = start_time, duration
     start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
@@ -83,15 +86,7 @@ def test_reload_while_loading(started_cluster):
     assert start_time == prev_start_time
     assert duration >= prev_duration
 
-    # SYSTEM RELOAD DICTIONARIES should restart loading again.
-    query("SYSTEM RELOAD DICTIONARIES")
-    assert get_status('slow') == "LOADING"
-    prev_start_time, prev_duration = start_time, duration
-    start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
-    assert start_time > prev_start_time
-    assert duration < prev_duration
-
-    # Changing the configuration file should restart loading one more time.
+    # Changing the configuration file should restart loading again.
     replace_in_file_in_container('/etc/clickhouse-server/config.d/slow.xml', 'sleep 100', 'sleep 0')
     time.sleep(5) # Configuration files are reloaded once in 5 seconds.
 
@@ -141,13 +136,13 @@ def test_reload_after_fail_by_system_reload(started_cluster):
     assert get_status("no_file") == "NOT_LOADED"
 
     # We expect an error because the file source doesn't exist.
-    expected_error = "No such file"
-    assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))")
+    no_such_file_error = "No such file"
+    assert no_such_file_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))")
     assert get_status("no_file") == "FAILED"
 
     # SYSTEM RELOAD should not change anything now, the status is still FAILED.
-    query("SYSTEM RELOAD DICTIONARY 'no_file'")
-    assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))")
+    assert no_such_file_error in instance.query_and_get_error("SYSTEM RELOAD DICTIONARY 'no_file'")
+    assert no_such_file_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))")
     assert get_status("no_file") == "FAILED"
 
     # Creating the file source makes the dictionary able to load.
@@ -158,7 +153,7 @@ def test_reload_after_fail_by_system_reload(started_cluster):
 
     # Removing the file source should not spoil the loaded dictionary.
     instance.exec_in_container("rm /etc/clickhouse-server/config.d/no_file.txt")
-    query("SYSTEM RELOAD DICTIONARY 'no_file'")
+    assert no_such_file_error in instance.query_and_get_error("SYSTEM RELOAD DICTIONARY 'no_file'")
     query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n"
     assert get_status("no_file") == "LOADED"
 
diff --git a/dbms/tests/integration/test_distributed_ddl/cluster.py b/dbms/tests/integration/test_distributed_ddl/cluster.py
index fed672d2274..28071381586 100644
--- a/dbms/tests/integration/test_distributed_ddl/cluster.py
+++ b/dbms/tests/integration/test_distributed_ddl/cluster.py
@@ -106,4 +106,4 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
                 if not (s.find('Unknown status, client must retry') >= 0 or s.find('zkutil::KeeperException')):
                     raise e
 
-        raise last_exception
\ No newline at end of file
+        raise last_exception
diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py
index 6e57ed1b2df..e30880e6ea4 100755
--- a/dbms/tests/integration/test_distributed_ddl/test.py
+++ b/dbms/tests/integration/test_distributed_ddl/test.py
@@ -245,6 +245,51 @@ def test_create_reserved(test_cluster):
     test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_reserved ON CLUSTER cluster")
 
 
+def test_rename(test_cluster):
+    instance = test_cluster.instances['ch1']
+    rules = test_cluster.pm_random_drops.pop_rules()
+    test_cluster.ddl_check_query(instance, "CREATE TABLE rename_shard ON CLUSTER cluster (id Int64, sid String DEFAULT concat('old', toString(id))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/staging/test_shard', '{replica}') ORDER BY (id)")
+    test_cluster.ddl_check_query(instance, "CREATE TABLE rename_new ON CLUSTER cluster AS rename_shard ENGINE = Distributed(cluster, default, rename_shard, id % 2)")
+    test_cluster.ddl_check_query(instance, "RENAME TABLE rename_new TO rename ON CLUSTER cluster;")
+
+
+    for i in range(10):
+        instance.query("insert into rename (id) values ({})".format(i))
+
+    # FIXME ddl_check_query doesnt work for replicated DDDL if replace_hostnames_with_ips=True
+    # because replicas use wrong host name of leader (and wrong path in zk) to check if it has executed query
+    # so ddl query will always fail on some replicas even if query was actually executed by leader
+    # Also such inconsistency in cluster configuration may lead to query duplication if leader suddenly changed
+    # because path of lock in zk contains shard name, which is list of host names of replicas
+    instance.query("ALTER TABLE rename_shard ON CLUSTER cluster MODIFY COLUMN sid String DEFAULT concat('new', toString(id))", ignore_error=True)
+    time.sleep(1)
+
+    test_cluster.ddl_check_query(instance, "CREATE TABLE rename_new ON CLUSTER cluster AS rename_shard ENGINE = Distributed(cluster, default, rename_shard, id % 2)")
+
+    instance.query("system stop distributed sends rename")
+
+    for i in range(10, 20):
+        instance.query("insert into rename (id) values ({})".format(i))
+
+    test_cluster.ddl_check_query(instance, "RENAME TABLE rename TO rename_old, rename_new TO rename ON CLUSTER cluster")
+
+    for i in range(20, 30):
+        instance.query("insert into rename (id) values ({})".format(i))
+
+    instance.query("system flush distributed rename")
+    for name in ['ch1', 'ch2', 'ch3', 'ch4']:
+        test_cluster.instances[name].query("system sync replica rename_shard")
+
+    # system stop distributed sends does not affect inserts into local shard,
+    # so some ids in range (10, 20) will be inserted into rename_shard
+    assert instance.query("select count(id), sum(id) from rename").rstrip() == "25\t360"
+    #assert instance.query("select count(id), sum(id) from rename").rstrip() == "20\t290"
+    assert instance.query("select count(id), sum(id) from rename where sid like 'old%'").rstrip() == "15\t115"
+    #assert instance.query("select count(id), sum(id) from rename where sid like 'old%'").rstrip() == "10\t45"
+    assert instance.query("select count(id), sum(id) from rename where sid like 'new%'").rstrip() == "10\t245"
+    test_cluster.pm_random_drops.push_rules(rules)
+
+
 if __name__ == '__main__':
     with contextmanager(test_cluster)() as ctx_cluster:
        for name, instance in ctx_cluster.instances.items():
diff --git a/dbms/tests/integration/test_prometheus_endpoint/test.py b/dbms/tests/integration/test_prometheus_endpoint/test.py
index 10f49c23072..dcd31621cb5 100644
--- a/dbms/tests/integration/test_prometheus_endpoint/test.py
+++ b/dbms/tests/integration/test_prometheus_endpoint/test.py
@@ -56,12 +56,12 @@ def get_and_check_metrics():
 def test_prometheus_endpoint(start_cluster):
 
     metrics_dict = get_and_check_metrics()
-    assert metrics_dict['ClickHouseProfileEventsQuery'] >= 0
-    prev_query_count = metrics_dict['ClickHouseProfileEventsQuery']
+    assert metrics_dict['ClickHouseProfileEvents_Query'] >= 0
+    prev_query_count = metrics_dict['ClickHouseProfileEvents_Query']
 
     resp = node.query("SELECT 1")
     resp = node.query("SELECT 2")
     resp = node.query("SELECT 3")
 
     metrics_dict = get_and_check_metrics()
-    assert metrics_dict['ClickHouseProfileEventsQuery'] >= prev_query_count + 3
+    assert metrics_dict['ClickHouseProfileEvents_Query'] >= prev_query_count + 3
diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
index bc1fda84ed9..e03a7300db0 100644
--- a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
+++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference
@@ -13,15 +13,19 @@
 70
 2019-01-01	50	[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50]
 2019-01-02	60	[11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70]
+2019-01-03	10	[1,2,3,4,5,6,7,8,9,10]
 60	50	70	40	20	30
 60	50	70	40	20	30
-90
-90
+100
+100
 20
-90
+100
 20
+[1,2,3]
+[1,2,3]
 2019-01-01	50
 2019-01-02	60
+2019-01-03	10
 1
 1
 1
diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
index 241a827fb9b..1217fbefc71 100644
--- a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
+++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql
@@ -15,6 +15,7 @@ DROP TABLE IF EXISTS bitmap_test;
 CREATE TABLE bitmap_test(pickup_date Date, city_id UInt32, uid UInt32)ENGINE = Memory;
 INSERT INTO bitmap_test SELECT '2019-01-01', 1, number FROM numbers(1,50);
 INSERT INTO bitmap_test SELECT '2019-01-02', 1, number FROM numbers(11,60);
+INSERT INTO bitmap_test SELECT '2019-01-03', 2, number FROM numbers(1,10);
 
 
 SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
@@ -65,6 +66,9 @@ SELECT count(*) FROM bitmap_test WHERE bitmapContains((SELECT groupBitmapState(u
 
 SELECT count(*) FROM bitmap_test WHERE 0 = bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), uid);
 
+-- PR#8082
+SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id;
+
 -- bitmap state test
 DROP TABLE IF EXISTS bitmap_state_test;
 CREATE TABLE bitmap_state_test
diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql
index 55d280cf045..45cc0e7eaf7 100644
--- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql
+++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql
@@ -66,8 +66,6 @@ SELECT '==DROP DICTIONARY';
 
 DROP DICTIONARY IF EXISTS ordinary_db.dict1;
 
-SYSTEM RELOAD DICTIONARY 'ordinary_db.dict1'; -- due to lazy_load at can persist for some time
-
 SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1';
 
 EXISTS DICTIONARY ordinary_db.dict1;
diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql
index cb63ce86ada..9b8edab4d41 100644
--- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql
+++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql
@@ -41,8 +41,6 @@ SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', t
 
 DETACH DICTIONARY database_for_dict.dict1;
 
-SYSTEM RELOAD DICTIONARY 'database_for_dict.dict1';
-
 SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36}
 
 ATTACH DICTIONARY database_for_dict.dict1;
@@ -51,8 +49,6 @@ SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11));
 
 DROP DICTIONARY database_for_dict.dict1;
 
-SYSTEM RELOAD DICTIONARY 'database_for_dict.dict1';
-
 SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36}
 
 CREATE DICTIONARY database_for_dict.dict1
@@ -111,7 +107,7 @@ SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12));
 
 DROP TABLE database_for_dict.table_for_dict;
 
-SYSTEM RELOAD DICTIONARIES;
+SYSTEM RELOAD DICTIONARIES; -- {serverError 60}
 
 SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12));
 
diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference
index c4000b670ee..e7190712871 100644
--- a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference
+++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference
@@ -3,7 +3,6 @@
 1
 SYSTEM RELOAD DICTIONARY
 0
-0
 10
 1
 CREATE DATABASE
diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql
index 785e8b4b25a..8fbe68e70e0 100644
--- a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql
+++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql
@@ -17,8 +17,7 @@ SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0));
 SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict';
 
 SELECT 'SYSTEM RELOAD DICTIONARY';
-SYSTEM RELOAD DICTIONARY 'dict_db_01036.dict';
-SELECT sleep(0.3);
+SYSTEM RELOAD DICTIONARY dict_db_01036.dict;
 SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict';
 SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0));
 SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict';
diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference
index c4000b670ee..e7190712871 100644
--- a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference
+++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference
@@ -3,7 +3,6 @@
 1
 SYSTEM RELOAD DICTIONARY
 0
-0
 10
 1
 CREATE DATABASE
diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql
index 46ebdcbd7b3..7f407daff14 100644
--- a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql
+++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql
@@ -17,8 +17,7 @@ SELECT dictGetUInt64('foo 1234.dict', 'val', toUInt64(0));
 SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict';
 
 SELECT 'SYSTEM RELOAD DICTIONARY';
-SYSTEM RELOAD DICTIONARY 'foo 1234.dict';
-SELECT sleep(0.3);
+SYSTEM RELOAD DICTIONARY `foo 1234`.dict;
 SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict';
 SELECT dictGetUInt64('foo 1234.dict', 'val', toUInt64(0));
 SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict';
diff --git a/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.reference b/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.reference
new file mode 100644
index 00000000000..a1e5c714e35
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.reference
@@ -0,0 +1,41 @@
+---StorageMerge---
+0
+0
+0
+0
+0
+0	0
+0	10000
+0	1000000
+0	100000000
+0	1001089600
+0	1002355600
+0	1003622400
+0	100400400
+0	1004890000
+0	1006158400
+OK
+---StorageBuffer---
+1	0
+1	1000000
+1	1000000000
+1	1000000000000
+1	100026577288000
+1	100155921984000
+1	10021812416000
+1	100285378136000
+1	100414945792000
+1	10049728312000
+OK
+---MaterializedView---
+0	0
+0	10000
+0	1000000
+0	100000000
+14	1000267129
+28	1000709956
+0	1001089600
+14	100140049
+14	1001532609
+28	1001975716
+OK
diff --git a/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh b/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh
new file mode 100755
index 00000000000..0898fec802c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+set -e
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS s1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS s2"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS m"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS buf"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mv"
+ 
+$CLICKHOUSE_CLIENT -q "CREATE TABLE s1 (a UInt32, s String) ENGINE = MergeTree ORDER BY a PARTITION BY a % 3"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE s2 (a UInt32, s String) ENGINE = MergeTree ORDER BY a PARTITION BY a % 3"
+
+$CLICKHOUSE_CLIENT -q "CREATE TABLE m (a UInt32, s String) engine = Merge(currentDatabase(), 's[1,2]')"
+$CLICKHOUSE_CLIENT -q "INSERT INTO s1 select (number % 20) * 2 as n, toString(number * number) from numbers(100000)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO s2 select (number % 20) * 2 + 1 as n, toString(number * number * number) from numbers(100000)"
+
+$CLICKHOUSE_CLIENT -q "SELECT '---StorageMerge---'"
+$CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 5"
+$CLICKHOUSE_CLIENT -q "SELECT a, s FROM m ORDER BY a, s LIMIT 10"
+
+# Not a single .sql test with max_rows_to_read because it doesn't work with Merge storage
+rows_read=`$CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g'`
+
+# Expected number of read rows with a bit margin
+if [[ $rows_read -lt 500 ]]
+    then echo "OK"
+else
+    echo "FAIL"
+fi
+
+$CLICKHOUSE_CLIENT -q "SELECT '---StorageBuffer---'"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE buf (a UInt32, s String) engine = Buffer(currentDatabase(), s2, 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
+$CLICKHOUSE_CLIENT -q "SELECT a, s FROM buf ORDER BY a, s LIMIT 10"
+rows_read=`$CLICKHOUSE_CLIENT -q "SELECT a FROM buf ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g'`
+
+# Expected number of read rows with a bit margin
+if [[ $rows_read -lt 500 ]]
+    then echo "OK"
+else
+    echo "FAIL"
+fi
+
+$CLICKHOUSE_CLIENT -q "SELECT '---MaterializedView---'"
+$CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW mv (a UInt32, s String) engine = MergeTree ORDER BY s POPULATE AS SELECT a, s FROM s1 WHERE a % 7 = 0"
+$CLICKHOUSE_CLIENT -q "SELECT a, s FROM mv ORDER BY s LIMIT 10"
+rows_read=`$CLICKHOUSE_CLIENT -q "SELECT a, s FROM mv ORDER BY s LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g'`
+
+if [[ $rows_read -lt 500 ]]
+    then echo "OK"
+else
+    echo "FAIL"
+fi
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS s1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS s2"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS m"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS buf"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mv"
\ No newline at end of file
diff --git a/dbms/tests/queries/0_stateless/01050_engine_join_crash.reference b/dbms/tests/queries/0_stateless/01050_engine_join_crash.reference
new file mode 100644
index 00000000000..f1a4d615cc0
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01050_engine_join_crash.reference
@@ -0,0 +1,12 @@
+1	1
+2	2
+3	3
+1	1
+2	2
+3	3
+3	3
+2	2
+1	1
+-
+1	52.5	ONE
+-
diff --git a/dbms/tests/queries/0_stateless/01050_engine_join_crash.sql b/dbms/tests/queries/0_stateless/01050_engine_join_crash.sql
new file mode 100644
index 00000000000..e95ab3e5022
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/01050_engine_join_crash.sql
@@ -0,0 +1,44 @@
+DROP TABLE IF EXISTS testJoinTable;
+
+CREATE TABLE testJoinTable (number UInt64, data String) ENGINE = Join(ANY, INNER, number);
+
+INSERT INTO testJoinTable VALUES (1, '1'), (2, '2'), (3, '3');
+
+SELECT * FROM (SELECT * FROM numbers(10)) INNER JOIN testJoinTable USING number;
+SELECT * FROM (SELECT * FROM numbers(10)) INNER JOIN (SELECT * FROM testJoinTable) USING number;
+SELECT * FROM testJoinTable;
+
+DROP TABLE testJoinTable;
+
+SELECT '-';
+
+SET any_join_distinct_right_table_keys = 1;
+ 
+DROP TABLE IF EXISTS master;
+DROP TABLE IF EXISTS transaction;
+
+CREATE TABLE transaction (id Int32, value Float64, master_id Int32) ENGINE = MergeTree() ORDER BY id;
+CREATE TABLE master (id Int32, name String) ENGINE = Join (ANY, LEFT, id);
+
+INSERT INTO master VALUES (1, 'ONE');
+INSERT INTO transaction VALUES (1, 52.5, 1);
+
+SELECT tx.id, tx.value, m.name FROM transaction tx ANY LEFT JOIN master m ON m.id = tx.master_id;
+
+DROP TABLE master;
+DROP TABLE transaction;
+
+SELECT '-';
+
+DROP TABLE IF EXISTS some_join;
+DROP TABLE IF EXISTS tbl;
+
+CREATE TABLE tbl (eventDate Date, id String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY eventDate;
+CREATE TABLE some_join (id String, value String) ENGINE = Join(ANY, LEFT, id);
+
+SELECT * FROM tbl AS t ANY LEFT JOIN some_join USING (id);
+SELECT * FROM tbl AS t ANY LEFT JOIN some_join AS d USING (id);
+-- TODO SELECT t.*, d.* FROM tbl AS t ANY LEFT JOIN some_join AS d USING (id);
+
+DROP TABLE some_join;
+DROP TABLE tbl;
diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile
index 5978dcd08d0..fbb5396365a 100644
--- a/docker/builder/Dockerfile
+++ b/docker/builder/Dockerfile
@@ -12,11 +12,7 @@ RUN apt-get update -y \
             expect \
             g++-9 \
             gcc-9 \
-            libclang-6.0-dev \
-            libicu-dev \
-            liblld-6.0-dev \
             libreadline-dev \
-            gperf \
             ninja-build \
             perl \
             pkg-config \
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index 4757a20b622..24037e8de81 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -18,7 +18,6 @@ RUN apt-get update \
             clickhouse-common-static=$version \
             clickhouse-client=$version \
             clickhouse-server=$version \
-            libgcc-7-dev \
             locales \
             tzdata \
             wget \
diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh
index bd702737274..6d586d1d77f 100755
--- a/docker/test/stateless_with_coverage/run.sh
+++ b/docker/test/stateless_with_coverage/run.sh
@@ -1,15 +1,16 @@
 #!/bin/bash
 
 kill_clickhouse () {
+    echo "clickhouse pids" `ps aux | grep clickhouse` | ts '%Y-%m-%d %H:%M:%S'
     kill `pgrep -u clickhouse` 2>/dev/null
 
     for i in {1..10}
     do
         if ! kill -0 `pgrep -u clickhouse`; then
-            echo "No clickhouse process"
+            echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
             break
         else
-            echo "Process" `pgrep -u clickhouse` "still alive"
+            echo "Process" `pgrep -u clickhouse` "still alive" | ts '%Y-%m-%d %H:%M:%S'
             sleep 10
         fi
     done
@@ -18,7 +19,7 @@ kill_clickhouse () {
 wait_llvm_profdata () {
     while kill -0 `pgrep llvm-profdata-9`;
     do
-        echo "Waiting for profdata" `pgrep llvm-profdata-9` "still alive"
+        echo "Waiting for profdata" `pgrep llvm-profdata-9` "still alive" | ts '%Y-%m-%d %H:%M:%S'
         sleep 3
     done
 }
diff --git a/docs/en/development/build.md b/docs/en/development/build.md
index 9bb9a7c6471..2df8e7eeb25 100644
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@@ -24,15 +24,9 @@ $ ./release
 
 The following tutorial is based on the Ubuntu Linux system.
 With appropriate changes, it should also work on any other Linux distribution.
-Only x86_64 with SSE 4.2 is supported. Support for AArch64 is experimental.
+Supported platforms: x86_64 and AArch64. Support for Power9 is experimental.
 
-To test for SSE 4.2, do
-
-```bash
-$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
-```
-
-## Install Git and CMake
+## Install Git, CMake and Ninja
 
 ```bash
 $ sudo apt-get install git cmake ninja-build
@@ -67,7 +61,7 @@ $ export CXX=g++-9
 ## Install Required Libraries from Packages
 
 ```bash
-$ sudo apt-get install libicu-dev libreadline-dev gperf
+$ sudo apt-get install libreadline-dev
 ```
 
 ## Checkout ClickHouse Sources
@@ -75,7 +69,7 @@ $ sudo apt-get install libicu-dev libreadline-dev gperf
 ```bash
 $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
 ```
-or 
+or
 ```bash
 $ git clone --recursive https://github.com/ClickHouse/ClickHouse.git
 $ cd ClickHouse
diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md
index 6c1ee0bc953..23fe52ddb64 100644
--- a/docs/en/development/build_osx.md
+++ b/docs/en/development/build_osx.md
@@ -11,7 +11,7 @@ $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/inst
 ## Install Required Compilers, Tools, and Libraries
 
 ```bash
-$ brew install cmake ninja llvm icu4c openssl libtool gettext readline gperf
+$ brew install cmake ninja libtool gettext readline
 ```
 
 ## Checkout ClickHouse Sources
diff --git a/docs/en/development/developer_instruction.md b/docs/en/development/developer_instruction.md
index 9e9a6a8dbd1..40eb60fc5da 100644
--- a/docs/en/development/developer_instruction.md
+++ b/docs/en/development/developer_instruction.md
@@ -98,11 +98,11 @@ Next, check the version of CMake: `cmake --version`. If it is below 3.3, you sho
 
 ClickHouse uses several external libraries for building. Most of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
 
-There is a couple of libraries that are not built from sources but are supplied by the system: ICU and Readline, and thus are recommended to be installed.
+There is one library that is not built from sources but is supplied by the system: Readline, and thus is recommended to be installed.
 
-Ubuntu: `sudo apt install libicu-dev libreadline-dev`
+Ubuntu: `sudo apt install libreadline-dev`
 
-Mac OS X: `brew install icu4c readline`
+Mac OS X: `brew install readline`
 
 However, these libraries are optional and ClickHouse can well be built without them. ICU is used for support of `COLLATE` in `ORDER BY` (i.e. for sorting in turkish alphabet). Readline is used for more convenient command input in clickhouse-client.
 
@@ -131,7 +131,7 @@ cd build
 ```
 You can have several different directories (build_release, build_debug, etc.) for different types of build.
 
-While inside the `build` directory, configure your build by running CMake. Before the first run you need to define environment variables that specify compiler (version 9 gcc compiler in this example). 
+While inside the `build` directory, configure your build by running CMake. Before the first run you need to define environment variables that specify compiler (version 9 gcc compiler in this example).
 
 Linux:
 ```
diff --git a/docs/ru/development/developer_instruction.md b/docs/ru/development/developer_instruction.md
index 5de79eade6c..61be36a7089 100644
--- a/docs/ru/development/developer_instruction.md
+++ b/docs/ru/development/developer_instruction.md
@@ -98,11 +98,11 @@ brew install cmake ninja
 
 ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib.
 
-Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить.
+Одна библиотека не собирается из исходников, а используется из системы: Readline, и её рекомендуется установить.
 
-Ubuntu: `sudo apt install libicu-dev libreadline-dev`
+Ubuntu: `sudo apt install libreadline-dev`
 
-Mac OS X: `brew install icu4c readline`
+Mac OS X: `brew install readline`
 
 Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client.
 
diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md
index ed8d64c0247..5fb0378f896 100644
--- a/docs/ru/extended_roadmap.md
+++ b/docs/ru/extended_roadmap.md
@@ -314,16 +314,15 @@ ClickHouse использует небольшое подмножество фу
 
 Добавление в submodules также нужно для Аркадии (7.26).
 
-### 7.2. LLVM в submodules.
+### 7.2. + LLVM в submodules.
 
-Уже добавлено, но старой версии, и поэтому не используется. Надо обновить.
-Георгий - очень опытный разработчик, либо будет делать Алексей Миловидов.
+Сделал Алексей Миловидов.
 
 ### 7.3. Обновление Poco.
 
 Алексанр Кузьменков.
 
-### 7.4. Включить libc++, libc++-abi при сборке с gcc.
+### 7.4. + Включить libc++, libc++-abi при сборке с gcc.
 
 Сейчас включено только при сборке с clang, но продакшен сборка использует gcc.
 Требует 7.2 и, возможно, 7.1 (только в случае новой версии ICU).
@@ -359,12 +358,11 @@ UBSan включен в функциональных тестах, но не в
 
 Пока есть просто показ тестового покрытия всего кода.
 
-### 7.13. Включение аналога -Weverything в gcc.
+### 7.13. + Включение аналога -Weverything в gcc.
 
 Мы используем -Wall -Wextra -Weverything -Werror.
 При сборке с clang, -Weverything уже включено. Но в gcc есть уникальные warning-и, отсутствующие в clang.
-Wolf Kreuzerkrieg. Возможно, его уже не интересует эта задача.
-Низкий приоритет. Возможно, будет отменено.
+Сделал Wolf Kreuzerkrieg.
 
 ### 7.14. Альтернатива для readline и libedit.
 
@@ -426,7 +424,6 @@ https://github.com/ClickHouse/ClickHouse/issues/8027#issuecomment-566670282
 Проверили на настоящем сервере Huawei, а также в специальном Docker контейнере, который содержит внутри qemu-user-static.
 Также можно проверить на Cavium, на Raspberry Pi а также на твоём Android телефоне.
 
-
 ### 7.20. Автосборка для FreeBSD x86_64.
 
 [Иван Лежанкин](https://github.com/abyss7).
diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md
index a1408fae987..3a3cdfd1b12 100644
--- a/docs/zh/development/build.md
+++ b/docs/zh/development/build.md
@@ -24,15 +24,9 @@ cd ClickHouse
 
 以下教程是在 Ubuntu Linux 中进行编译的示例。
 通过适当的更改，它应该可以适用于任何其他的 Linux 发行版。
-仅支持具有 SSE 4.2的 x86_64。 对 AArch64 的支持是实验性的。
+仅支持具有 x86_64、AArch64。 对 Power9 的支持是实验性的。
 
-测试是否支持 SSE 4.2，执行：
-
-```bash
-grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
-```
-
-## 安装 Git 和 CMake
+## 安装 Git 和 CMake 和 Ninja
 
 ```bash
 sudo apt-get install git cmake ninja-build
@@ -41,7 +35,7 @@ sudo apt-get install git cmake ninja-build
 Or cmake3 instead of cmake on older systems.
 或者在早期版本的系统中用 cmake3 替代 cmake
 
-## 安装 GCC 7
+## 安装 GCC 9
 
 There are several ways to do this.
 
@@ -51,24 +45,24 @@ There are several ways to do this.
 sudo apt-get install software-properties-common
 sudo apt-add-repository ppa:ubuntu-toolchain-r/test
 sudo apt-get update
-sudo apt-get install gcc-7 g++-7
+sudo apt-get install gcc-9 g++-9
 ```
 
 ### 源码安装 gcc
 
 请查看 [utils/ci/build-gcc-from-sources.sh](https://github.com/yandex/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
 
-## 使用 GCC 7 来编译
+## 使用 GCC 9 来编译
 
 ```bash
-export CC=gcc-7
-export CXX=g++-7
+export CC=gcc-9
+export CXX=g++-9
 ```
 
 ## 安装所需的工具依赖库
 
 ```bash
-sudo apt-get install libicu-dev libreadline-dev
+sudo apt-get install libreadline-dev
 ```
 
 ## 拉取 ClickHouse 源码
diff --git a/docs/zh/development/developer_instruction.md b/docs/zh/development/developer_instruction.md
index cbd9371402d..3f257d5a58e 100644
--- a/docs/zh/development/developer_instruction.md
+++ b/docs/zh/development/developer_instruction.md
@@ -105,11 +105,11 @@ brew install cmake ninja
 
 ClickHouse使用多个外部库进行构建。大多数外部库不需要单独安装，而是和ClickHouse一起在子模块中构建。可以查看`contrib`中罗列的清单。
 
-有一些库不是由源构建的，而是由系统提供，例如：ICU以及Readline，也建议安装。
+有一些库不是由源构建的，而是由系统提供，例如：Readline，也建议安装。
 
-Ubuntu: `sudo apt install libicu-dev libreadline-dev`
+Ubuntu: `sudo apt install libreadline-dev`
 
-Mac OS X: `brew install icu4c readline`
+Mac OS X: `brew install readline`
 
 但是，这些库本身都是可选的，ClickHouse即便没有它们也可以构建。ICU用于支持`ORDER BY`中的`COLLATE`(例如，对土耳其字母进行排序)。Readline用于在clickhouse-client中更便捷的指令输入。
 
diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt
index 3e58cba0164..a5be37026b1 100644
--- a/libs/libcommon/CMakeLists.txt
+++ b/libs/libcommon/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library (common
     src/argsToConfig.cpp
     src/Pipe.cpp
     src/phdr_cache.cpp
+    src/coverage.cpp
 
     include/common/SimpleCache.h
     include/common/Types.h
@@ -51,6 +52,7 @@ add_library (common
     include/common/sleep.h
     include/common/SimpleCache.h
     include/common/phdr_cache.h
+    include/common/coverage.h
 
     include/ext/bit_cast.h
     include/ext/chrono_io.h
diff --git a/libs/libcommon/cmake/find_gperftools.cmake b/libs/libcommon/cmake/find_gperftools.cmake
deleted file mode 100644
index 73e8182d390..00000000000
--- a/libs/libcommon/cmake/find_gperftools.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-if (OS_FREEBSD OR ARCH_32)
-    option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" OFF)
-else ()
-    option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" ${NOT_UNBUNDLED})
-endif ()
-
-option (ENABLE_TCMALLOC "Set to TRUE to enable tcmalloc" OFF)
-option (DEBUG_TCMALLOC "Set to TRUE to use debug version of libtcmalloc" OFF)
-
-if (ENABLE_TCMALLOC)
-    #contrib/libtcmalloc doesnt build debug version, try find in system
-    if (DEBUG_TCMALLOC OR NOT USE_INTERNAL_GPERFTOOLS_LIBRARY)
-        find_package (Gperftools)
-    endif ()
-
-    if (NOT (GPERFTOOLS_FOUND AND GPERFTOOLS_INCLUDE_DIR AND GPERFTOOLS_TCMALLOC_MINIMAL) AND NOT (OS_FREEBSD OR ARCH_32))
-        set (USE_INTERNAL_GPERFTOOLS_LIBRARY 1)
-        set (GPERFTOOLS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libtcmalloc/include")
-        set (GPERFTOOLS_TCMALLOC_MINIMAL tcmalloc_minimal_internal)
-    endif ()
-
-    if (GPERFTOOLS_FOUND OR USE_INTERNAL_GPERFTOOLS_LIBRARY)
-        set (USE_TCMALLOC 1)
-    endif ()
-
-    if (SANITIZE)
-        message (FATAL_ERROR "ENABLE_TCMALLOC is set to true, but it cannot be used with sanitizers")
-    endif ()
-
-    message (STATUS "Using tcmalloc=${USE_TCMALLOC}: ${GPERFTOOLS_INCLUDE_DIR} : ${GPERFTOOLS_TCMALLOC_MINIMAL}")
-endif ()
diff --git a/libs/libcommon/include/common/coverage.h b/libs/libcommon/include/common/coverage.h
new file mode 100644
index 00000000000..4a57528b0ce
--- /dev/null
+++ b/libs/libcommon/include/common/coverage.h
@@ -0,0 +1,9 @@
+#pragma once
+
+/// Flush coverage report to file, depending on coverage system
+/// proposed by compiler (llvm for clang and gcov for gcc).
+///
+/// Noop if build without coverage (WITH_COVERAGE=0).
+/// Thread safe (use exclusive lock).
+/// Idempotent, may be called multiple times.
+void dumpCoverageReportIfPossible();
diff --git a/libs/libcommon/src/coverage.cpp b/libs/libcommon/src/coverage.cpp
new file mode 100644
index 00000000000..d8d3b71edd1
--- /dev/null
+++ b/libs/libcommon/src/coverage.cpp
@@ -0,0 +1,31 @@
+#include <common/coverage.h>
+#include <common/config_common.h>
+
+#if WITH_COVERAGE
+
+#include <unistd.h>
+#include <mutex>
+
+#if  defined(__clang__)
+extern "C" void __llvm_profile_dump();
+#elif defined(__GNUC__) || defined(__GNUG__)
+extern "C" void __gcov_exit();
+#endif
+
+#endif
+
+
+void dumpCoverageReportIfPossible()
+{
+#if WITH_COVERAGE
+    static std::mutex mutex;
+    std::lock_guard lock(mutex);
+
+#if defined(__clang__)
+    __llvm_profile_dump();
+#elif defined(__GNUC__) || defined(__GNUG__)
+    __gcov_exit();
+#endif
+
+#endif
+}
diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp
index 15b61c9b454..99dfb88f2dd 100644
--- a/libs/libdaemon/src/BaseDaemon.cpp
+++ b/libs/libdaemon/src/BaseDaemon.cpp
@@ -25,6 +25,7 @@
 #include <Poco/Observer.h>
 #include <Poco/AutoPtr.h>
 #include <common/getThreadNumber.h>
+#include <common/coverage.h>
 #include <Poco/PatternFormatter.h>
 #include <Poco/TaskManager.h>
 #include <Poco/File.h>
@@ -461,6 +462,7 @@ void BaseDaemon::terminate()
 
 void BaseDaemon::kill()
 {
+    dumpCoverageReportIfPossible();
     pid.clear();
     if (::raise(SIGKILL) != 0)
         throw Poco::SystemException("cannot kill process");
diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt
index 7a0d2c77355..8405c9450a5 100644
--- a/libs/libglibc-compatibility/CMakeLists.txt
+++ b/libs/libglibc-compatibility/CMakeLists.txt
@@ -38,6 +38,8 @@ if (GLIBC_COMPATIBILITY)
 
     if (COMPILER_CLANG)
         target_compile_options(glibc-compatibility PRIVATE -Wno-unused-command-line-argument)
+    elseif (COMPILER_GCC)
+        target_compile_options(glibc-compatibility PRIVATE -Wno-unused-but-set-variable)
     endif ()
 
     target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir})
diff --git a/libs/libglibc-compatibility/musl/log2f.c b/libs/libglibc-compatibility/musl/log2f.c
new file mode 100644
index 00000000000..c368f88f33f
--- /dev/null
+++ b/libs/libglibc-compatibility/musl/log2f.c
@@ -0,0 +1,72 @@
+/*
+ * Single-precision log2 function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "libm.h"
+#include "log2f_data.h"
+
+/*
+LOG2F_TABLE_BITS = 4
+LOG2F_POLY_ORDER = 4
+
+ULP error: 0.752 (nearest rounding.)
+Relative error: 1.9 * 2^-26 (before rounding.)
+*/
+
+#define N (1 << LOG2F_TABLE_BITS)
+#define T __log2f_data.tab
+#define A __log2f_data.poly
+#define OFF 0x3f330000
+
+float log2f(float x)
+{
+	double_t z, r, r2, p, y, y0, invc, logc;
+	uint32_t ix, iz, top, tmp;
+	int k, i;
+
+	ix = asuint(x);
+	/* Fix sign of zero with downward rounding when x==1.  */
+	if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
+		return 0;
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+		/* x < 0x1p-126 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzerof(1);
+		if (ix == 0x7f800000) /* log2(inf) == inf.  */
+			return x;
+		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+			return __math_invalidf(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint(x * 0x1p23f);
+		ix -= 23 << 23;
+	}
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+	top = tmp & 0xff800000;
+	iz = ix - top;
+	k = (int32_t)tmp >> 23; /* arithmetic shift */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
+
+	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k;
+
+	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+	r2 = r * r;
+	y = A[1] * r + A[2];
+	y = A[0] * r2 + y;
+	p = A[3] * r + y0;
+	y = y * r2 + p;
+	return eval_as_float(y);
+}
diff --git a/libs/libglibc-compatibility/musl/log2f_data.c b/libs/libglibc-compatibility/musl/log2f_data.c
new file mode 100644
index 00000000000..24e450f1ec3
--- /dev/null
+++ b/libs/libglibc-compatibility/musl/log2f_data.c
@@ -0,0 +1,33 @@
+/*
+ * Data definition for log2f.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "log2f_data.h"
+
+const struct log2f_data __log2f_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
+  },
+  .poly = {
+  -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
+  0x1.715475f35c8b8p0,
+  }
+};
diff --git a/libs/libglibc-compatibility/musl/log2f_data.h b/libs/libglibc-compatibility/musl/log2f_data.h
new file mode 100644
index 00000000000..91d781c10fe
--- /dev/null
+++ b/libs/libglibc-compatibility/musl/log2f_data.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _LOG2F_DATA_H
+#define _LOG2F_DATA_H
+
+#include "musl_features.h"
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern hidden const struct log2f_data {
+	struct {
+		double invc, logc;
+	} tab[1 << LOG2F_TABLE_BITS];
+	double poly[LOG2F_POLY_ORDER];
+} __log2f_data;
+
+#endif
diff --git a/utils/grammar/ClickHouseParser.g4 b/utils/grammar/ClickHouseParser.g4
index fa00d29d704..5cb4676fcb8 100644
--- a/utils/grammar/ClickHouseParser.g4
+++ b/utils/grammar/ClickHouseParser.g4
@@ -12,7 +12,7 @@ options {
 // 4. правило для expr переписано чтобы понизить глубину AST и сразу выходить на уровень expr - al
 
 parse
- : ( query | error ) EOF
+ : ( query | err ) EOF
  ;
 
 query
@@ -41,7 +41,7 @@ select_query
  ;
 
 select_query_main
- :  select_with_step
+ :  select_with_step?
     select_select_step select_from_step?
     K_FINAL? select_sample_step?
     select_array_join_step? select_join_step?
@@ -575,7 +575,7 @@ literal
  |    STRING_LITERAL
  ;
 
-error
+err
  : UNEXPECTED_CHAR
    {
      throw new RuntimeException("UNEXPECTED_CHAR=" + $UNEXPECTED_CHAR.text);