Initial working example of Azure Blob Storage manipulation from within ClickHouse

This commit is contained in:
Jakub Kuklis 2021-09-27 09:31:27 +00:00 committed by Jakub Kuklis
parent 5a542516e6
commit 162e2b334b
11 changed files with 251 additions and 0 deletions

View File

@ -508,6 +508,7 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/poco.cmake)
include (cmake/find/curl.cmake)
include (cmake/find/s3.cmake)
include (cmake/find/blob_storage.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/simdjson.cmake)

View File

@ -0,0 +1,9 @@
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY "Set to FALSE to use system S3 instead of bundled (OFF currently not implemented)"
ON)
if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
set(USE_AZURE_BLOB_STORAGE 1)
set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
endif()
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")

View File

@ -249,6 +249,10 @@ endif()
# - sentry-native
add_subdirectory (curl-cmake)
if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
add_subdirectory(azure-cmake)
endif()
if (USE_SENTRY)
add_subdirectory (sentry-native-cmake)
endif()

1
contrib/azure vendored Submodule

@ -0,0 +1 @@
Subproject commit 23e738b7195c0895f67a6d82b0f111b7c68b8b7a

View File

@ -0,0 +1,42 @@
## Copyright (c) Microsoft Corporation. All rights reserved.
## SPDX-License-Identifier: MIT
############## TRANSPORT ADAPTER BUILD ######################
# Default: If no option is explicitly added, libcurl will be used for POSIX and WinHTTP for Windows #
# Windows: Both CURL and WINHTTP can be built to be used. #
# POSIX: Only CURL is acceptable. If WINHTTP is set, generate step will fail for user #
if (BUILD_TRANSPORT_CUSTOM)
message("Using the user-defined transport adapter. Make sure `AzureSdkGetCustomHttpTransport` is implemented and linked.")
add_compile_definitions(BUILD_TRANSPORT_CUSTOM_ADAPTER)
endif()
# Defines `BUILD_TRANSPORT_WINHTTP_ADAPTER` and `BUILD_CURL_HTTP_TRANSPORT_ADAPTER` for source code
# On Windows: Make sure to build WinHTTP either if it was user-requested or no transport was selected at all.
# On POSIX: Make sure to build Curl either if it was user-requested or no transport was selected at all.
if (WIN32 OR MINGW OR MSYS OR CYGWIN)
if (BUILD_TRANSPORT_CURL)
add_compile_definitions(BUILD_CURL_HTTP_TRANSPORT_ADAPTER)
endif()
if (BUILD_TRANSPORT_WINHTTP OR (NOT BUILD_TRANSPORT_CURL AND NOT BUILD_TRANSPORT_CUSTOM))
message("By default, if no option is selected, on Windows, WinHTTP transport adapter is used.")
add_compile_definitions(BUILD_TRANSPORT_WINHTTP_ADAPTER)
if(AZ_ALL_LIBRARIES)
SET(BUILD_TRANSPORT_WINHTTP ON)
endif()
endif()
elseif (UNIX)
if (BUILD_TRANSPORT_WINHTTP)
message(FATAL_ERROR "WinHTTP transport adapter is not supported for POSIX platforms.")
endif()
if (BUILD_TRANSPORT_CURL OR (NOT BUILD_TRANSPORT_CUSTOM))
message("By default, if no option is selected, on POSIX, libcurl transport adapter is used.")
add_compile_definitions(BUILD_CURL_HTTP_TRANSPORT_ADAPTER)
if(AZ_ALL_LIBRARIES)
SET(BUILD_TRANSPORT_CURL ON)
endif()
endif()
else()
message(FATAL_ERROR "Unsupported platform.")
endif()

View File

@ -0,0 +1,74 @@
set(AZURE_SDK_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk")
file(GLOB AZURE_SDK_CORE_SRC
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp"
)
file(GLOB AZURE_SDK_IDENTITY_SRC
"${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp"
)
file(GLOB AZURE_SDK_STORAGE_COMMON_SRC
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp"
)
file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp"
)
file(GLOB AZURE_SDK_UNIFIED_SRC
${AZURE_SDK_CORE_SRC}
${AZURE_SDK_IDENTITY_SRC}
${AZURE_SDK_STORAGE_COMMON_SRC}
${AZURE_SDK_STORAGE_BLOBS_SRC}
)
set(AZURE_SDK_INCLUDES
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/inc/"
"${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/inc/"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/inc/"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/inc/"
)
include(AzureTransportAdapters.cmake)
add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC})
# # original snippet:
# if(WIN32)
# target_link_libraries(azure-core PRIVATE bcrypt crypt32)
# else()
# find_package(OpenSSL REQUIRED)
# target_link_libraries(azure-core PRIVATE OpenSSL::SSL)
# endif()
if (OPENSSL_FOUND)
target_link_libraries(azure_sdk PRIVATE ${OPENSSL_LIBRARIES})
endif()
# # azure core is originally built as:
#
# if(BUILD_TRANSPORT_CURL)
# target_link_libraries(azure-core PUBLIC CURL::libcurl)
# endif()
# if(BUILD_TRANSPORT_WINHTTP)
# target_link_libraries(azure-core PRIVATE winhttp)
# endif()
target_link_libraries(azure_sdk PRIVATE CURL::libcurl)
target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES})
target_include_directories(azure_sdk PUBLIC ${LIBXML2_INCLUDE_DIRS})
target_include_directories(azure_sdk PUBLIC ${AZURE_SDK_INCLUDES})

View File

@ -639,6 +639,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
"${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
"${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
)
add_executable(

View File

@ -106,6 +106,10 @@ if (USE_AWS_S3)
add_headers_and_sources(dbms Disks/S3)
endif()
if (USE_AZURE_BLOB_STORAGE)
add_headers_and_sources(dbms Disks/BlobStorage)
endif()
if (USE_HDFS)
add_headers_and_sources(dbms Storages/HDFS)
add_headers_and_sources(dbms Disks/HDFS)
@ -450,6 +454,11 @@ if (USE_AWS_S3)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR})
endif()
if (USE_AZURE_BLOB_STORAGE)
target_link_libraries (clickhouse_common_io PUBLIC ${AZURE_BLOB_STORAGE_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AZURE_SDK_INCLUDES})
endif()
if (USE_S2_GEOMETRY)
dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR})

View File

@ -0,0 +1,85 @@
// #if USE_AZURE_BLOB_STORAGE
#include <iostream>
#include <azure/storage/blobs.hpp>
#include <azure/identity/managed_identity_credential.hpp>
namespace DB
{
void blob_do_sth()
{
// not to repeat it for every storage function
using namespace Azure::Storage::Blobs;
// obtain the managed identity, it should be available in the VM
// quote from sdk/identity/azure-identity/samples/managed_identity_credential.cpp:
// "Managed Identity Credential would be available in some environments such as on Azure VMs."
auto managedIdentityCredential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
// url follows the format "http://*mystorageaccount*.blob.core.windows.net/*mycontainer*/*myblob*",
// with blob name and container name being optional
// here we only specify the account "stor_acc" and container "cont"
auto url = "https://sadttmpstgeus.blob.core.windows.net/data";
// create a client accessing the container "cont"
auto blobContainerClient = BlobContainerClient(url, managedIdentityCredential);
// list blobs in the container
auto listBlobs = blobContainerClient.ListBlobs();
// print information about the container
std::cout << "Storage account: " << listBlobs.ServiceEndpoint
<< ", container: " << listBlobs.BlobContainerName << "\n";
// print information about the blobs in the container
std::cout << "Blobs (max 20):\n";
for (size_t i = 0; i < 20 && i < listBlobs.Blobs.size(); i++)
{
auto & x = listBlobs.Blobs[i];
std::cout << x.Name << " " << x.BlobSize << " " << x.BlobType.ToString() << "\n";
}
std::cout << "\n";
// change url to point to a blob
url = "https://sadttmpstgeus.blob.core.windows.net/data/hello";
// create a client accessing the blob
auto blobClient = BlobClient(url, managedIdentityCredential);
// obtain properties of the blob
auto blobProperties = blobClient.GetProperties();
// print the creation date for the blob
std::cout << blobProperties.Value.CreatedOn.ToString() << "\n";
// create a client to manipulate the blob
auto blockBlobClient = BlockBlobClient(url, managedIdentityCredential);
// data to be put in the blob
const uint8_t dataSample[] = {1, 2, 3};
// overwrite "file.txt" blob with the data above
blockBlobClient.UploadFrom(dataSample, 3);
// get list of blocks within the block
auto blobList = blockBlobClient.GetBlockList();
// should print a recent time and size 3
std::cout << "Last modified date of uploaded blob: " << blobList.Value.LastModified.ToString()
<< ", size: " << blobList.Value.BlobSize << "\n";
// // overwrite "file.txt" blob with the data from the file "file.txt" in the same directory
// blockBlobClient.UploadFrom("file.txt");
// // once again, get list of blocks within the block
// blobList = blockBlobClient.GetBlockList();
// // should print a recent time and the size of file.txt
// std::cout << "Last modified date of uploaded blob: " << blobList.Value.LastModified.ToString()
// << ", size: " << blobList.Value.BlobSize << "\n";
}
}
// #endif

View File

@ -0,0 +1,12 @@
#pragma once
// #if USE_AZURE_BLOB_STORAGE
namespace DB
{
void blob_do_sth();
}
// #endif

View File

@ -0,0 +1,13 @@
#include <gtest/gtest.h>
#include <Disks/BlobStorage/Blob.h>
// #if USE_AZURE_BLOB_STORAGE
using namespace DB;
TEST(DiskBlobStorage, doAll)
{
blob_do_sth();
}
// #endif