mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Add integration tests for hive query
This commit is contained in:
parent
f33ec0fd47
commit
9902ccefc5
47
docker/test/integration/hive_server/Dockerfile
Normal file
47
docker/test/integration/hive_server/Dockerfile
Normal file
@ -0,0 +1,47 @@
|
||||
FROM ubuntu:20.04
|
||||
MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y wget openjdk-8-jre
|
||||
|
||||
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
|
||||
tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
|
||||
RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
|
||||
tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
|
||||
RUN apt install -y vim
|
||||
|
||||
RUN apt install -y openssh-server openssh-client
|
||||
|
||||
RUN apt install -y mysql-server
|
||||
|
||||
RUN mkdir -p /root/.ssh && \
|
||||
ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \
|
||||
cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \
|
||||
cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \
|
||||
cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub
|
||||
|
||||
RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\
|
||||
tar -xf mysql-connector-java-8.0.27.tar.gz && \
|
||||
mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
|
||||
rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
|
||||
|
||||
RUN apt install -y iputils-ping net-tools
|
||||
|
||||
ENV JAVA_HOME=/usr
|
||||
ENV HADOOP_HOME=/hadoop-3.1.0
|
||||
ENV HDFS_NAMENODE_USER=root
|
||||
ENV HDFS_DATANODE_USER=root HDFS_SECONDARYNAMENODE_USER=root YARN_RESOURCEMANAGER_USER=root YARN_NODEMANAGER_USER=root HDFS_DATANODE_SECURE_USER=hdfs
|
||||
COPY hdfs-site.xml /hadoop-3.1.0/etc/hadoop
|
||||
COPY mapred-site.xml /hadoop-3.1.0/etc/hadoop
|
||||
COPY yarn-site.xml /hadoop-3.1.0/etc/hadoop
|
||||
COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/
|
||||
#COPY core-site.xml /hadoop-3.1.0/etc/hadoop
|
||||
COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop
|
||||
COPY hive-site.xml /apache-hive-2.3.9-bin/conf
|
||||
COPY prepare_hive_data.sh /
|
||||
COPY demo_data.txt /
|
||||
|
||||
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
|
||||
|
||||
COPY start.sh /
|
||||
|
14
docker/test/integration/hive_server/core-site.xml.template
Normal file
14
docker/test/integration/hive_server/core-site.xml.template
Normal file
@ -0,0 +1,14 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://HOSTNAME:9000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
</configuration>
|
6
docker/test/integration/hive_server/demo_data.txt
Normal file
6
docker/test/integration/hive_server/demo_data.txt
Normal file
@ -0,0 +1,6 @@
|
||||
abc,1,2021-11-16
|
||||
abd,15,2021-11-05
|
||||
aaa,22,2021-11-16
|
||||
dda,0,2021-11-01
|
||||
dfb,11,2021-11-05
|
||||
jhn,89,2021-11-11
|
422
docker/test/integration/hive_server/hadoop-env.sh
Normal file
422
docker/test/integration/hive_server/hadoop-env.sh
Normal file
@ -0,0 +1,422 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Set Hadoop-specific environment variables here.
|
||||
|
||||
##
|
||||
## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
|
||||
## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE,
|
||||
## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
|
||||
## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
|
||||
##
|
||||
## Precedence rules:
|
||||
##
|
||||
## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
|
||||
##
|
||||
## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
|
||||
##
|
||||
|
||||
# Many of the options here are built from the perspective that users
|
||||
# may want to provide OVERWRITING values on the command line.
|
||||
# For example:
|
||||
#
|
||||
JAVA_HOME=/usr/
|
||||
#
|
||||
# Therefore, the vast majority (BUT NOT ALL!) of these defaults
|
||||
# are configured for substitution and not append. If append
|
||||
# is preferable, modify this file accordingly.
|
||||
|
||||
###
|
||||
# Generic settings for HADOOP
|
||||
###
|
||||
|
||||
# Technically, the only required environment variable is JAVA_HOME.
|
||||
# All others are optional. However, the defaults are probably not
|
||||
# preferred. Many sites configure these options outside of Hadoop,
|
||||
# such as in /etc/profile.d
|
||||
|
||||
# The java implementation to use. By default, this environment
|
||||
# variable is REQUIRED on ALL platforms except OS X!
|
||||
# export JAVA_HOME=
|
||||
|
||||
# Location of Hadoop. By default, Hadoop will attempt to determine
|
||||
# this location based upon its execution path.
|
||||
# export HADOOP_HOME=
|
||||
|
||||
# Location of Hadoop's configuration information. i.e., where this
|
||||
# file is living. If this is not defined, Hadoop will attempt to
|
||||
# locate it based upon its execution path.
|
||||
#
|
||||
# NOTE: It is recommend that this variable not be set here but in
|
||||
# /etc/profile.d or equivalent. Some options (such as
|
||||
# --config) may react strangely otherwise.
|
||||
#
|
||||
# export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||
|
||||
# The maximum amount of heap to use (Java -Xmx). If no unit
|
||||
# is provided, it will be converted to MB. Daemons will
|
||||
# prefer any Xmx setting in their respective _OPT variable.
|
||||
# There is no default; the JVM will autoscale based upon machine
|
||||
# memory size.
|
||||
# export HADOOP_HEAPSIZE_MAX=
|
||||
|
||||
# The minimum amount of heap to use (Java -Xms). If no unit
|
||||
# is provided, it will be converted to MB. Daemons will
|
||||
# prefer any Xms setting in their respective _OPT variable.
|
||||
# There is no default; the JVM will autoscale based upon machine
|
||||
# memory size.
|
||||
# export HADOOP_HEAPSIZE_MIN=
|
||||
|
||||
# Enable extra debugging of Hadoop's JAAS binding, used to set up
|
||||
# Kerberos security.
|
||||
# export HADOOP_JAAS_DEBUG=true
|
||||
|
||||
# Extra Java runtime options for all Hadoop commands. We don't support
|
||||
# IPv6 yet/still, so by default the preference is set to IPv4.
|
||||
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
|
||||
# For Kerberos debugging, an extended option set logs more invormation
|
||||
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug"
|
||||
|
||||
# Some parts of the shell code may do special things dependent upon
|
||||
# the operating system. We have to set this here. See the next
|
||||
# section as to why....
|
||||
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
|
||||
|
||||
|
||||
# Under certain conditions, Java on OS X will throw SCDynamicStore errors
|
||||
# in the system logs.
|
||||
# See HADOOP-8719 for more information. If one needs Kerberos
|
||||
# support on OS X, one will want to change/remove this extra bit.
|
||||
case ${HADOOP_OS_TYPE} in
|
||||
Darwin*)
|
||||
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
|
||||
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
|
||||
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
|
||||
;;
|
||||
esac
|
||||
|
||||
# Extra Java runtime options for some Hadoop commands
|
||||
# and clients (i.e., hdfs dfs -blah). These get appended to HADOOP_OPTS for
|
||||
# such commands. In most cases, # this should be left empty and
|
||||
# let users supply it on the command line.
|
||||
# export HADOOP_CLIENT_OPTS=""
|
||||
|
||||
#
|
||||
# A note about classpaths.
|
||||
#
|
||||
# By default, Apache Hadoop overrides Java's CLASSPATH
|
||||
# environment variable. It is configured such
|
||||
# that it sarts out blank with new entries added after passing
|
||||
# a series of checks (file/dir exists, not already listed aka
|
||||
# de-deduplication). During de-depulication, wildcards and/or
|
||||
# directories are *NOT* expanded to keep it simple. Therefore,
|
||||
# if the computed classpath has two specific mentions of
|
||||
# awesome-methods-1.0.jar, only the first one added will be seen.
|
||||
# If two directories are in the classpath that both contain
|
||||
# awesome-methods-1.0.jar, then Java will pick up both versions.
|
||||
|
||||
# An additional, custom CLASSPATH. Site-wide configs should be
|
||||
# handled via the shellprofile functionality, utilizing the
|
||||
# hadoop_add_classpath function for greater control and much
|
||||
# harder for apps/end-users to accidentally override.
|
||||
# Similarly, end users should utilize ${HOME}/.hadooprc .
|
||||
# This variable should ideally only be used as a short-cut,
|
||||
# interactive way for temporary additions on the command line.
|
||||
# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine"
|
||||
|
||||
# Should HADOOP_CLASSPATH be first in the official CLASSPATH?
|
||||
# export HADOOP_USER_CLASSPATH_FIRST="yes"
|
||||
|
||||
# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along
|
||||
# with the main jar are handled by a separate isolated
|
||||
# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job'
|
||||
# is utilized. If it is set, HADOOP_CLASSPATH and
|
||||
# HADOOP_USER_CLASSPATH_FIRST are ignored.
|
||||
# export HADOOP_USE_CLIENT_CLASSLOADER=true
|
||||
|
||||
# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of
|
||||
# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER
|
||||
# is enabled. Names ending in '.' (period) are treated as package names, and
|
||||
# names starting with a '-' are treated as negative matches. For example,
|
||||
# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop."
|
||||
|
||||
# Enable optional, bundled Hadoop features
|
||||
# This is a comma delimited list. It may NOT be overridden via .hadooprc
|
||||
# Entries may be added/removed as needed.
|
||||
# export HADOOP_OPTIONAL_TOOLS="hadoop-openstack,hadoop-aliyun,hadoop-azure,hadoop-azure-datalake,hadoop-aws,hadoop-kafka"
|
||||
|
||||
###
|
||||
# Options for remote shell connectivity
|
||||
###
|
||||
|
||||
# There are some optional components of hadoop that allow for
|
||||
# command and control of remote hosts. For example,
|
||||
# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
|
||||
|
||||
# Options to pass to SSH when one of the "log into a host and
|
||||
# start/stop daemons" scripts is executed
|
||||
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
|
||||
|
||||
# The built-in ssh handler will limit itself to 10 simultaneous connections.
|
||||
# For pdsh users, this sets the fanout size ( -f )
|
||||
# Change this to increase/decrease as necessary.
|
||||
# export HADOOP_SSH_PARALLEL=10
|
||||
|
||||
# Filename which contains all of the hosts for any remote execution
|
||||
# helper scripts # such as workers.sh, start-dfs.sh, etc.
|
||||
# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers"
|
||||
|
||||
###
|
||||
# Options for all daemons
|
||||
###
|
||||
#
|
||||
|
||||
#
|
||||
# Many options may also be specified as Java properties. It is
|
||||
# very common, and in many cases, desirable, to hard-set these
|
||||
# in daemon _OPTS variables. Where applicable, the appropriate
|
||||
# Java property is also identified. Note that many are re-used
|
||||
# or set differently in certain contexts (e.g., secure vs
|
||||
# non-secure)
|
||||
#
|
||||
|
||||
# Where (primarily) daemon log files are stored.
|
||||
# ${HADOOP_HOME}/logs by default.
|
||||
# Java property: hadoop.log.dir
|
||||
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
|
||||
|
||||
# A string representing this instance of hadoop. $USER by default.
|
||||
# This is used in writing log and pid files, so keep that in mind!
|
||||
# Java property: hadoop.id.str
|
||||
# export HADOOP_IDENT_STRING=$USER
|
||||
|
||||
# How many seconds to pause after stopping a daemon
|
||||
# export HADOOP_STOP_TIMEOUT=5
|
||||
|
||||
# Where pid files are stored. /tmp by default.
|
||||
# export HADOOP_PID_DIR=/tmp
|
||||
|
||||
# Default log4j setting for interactive commands
|
||||
# Java property: hadoop.root.logger
|
||||
# export HADOOP_ROOT_LOGGER=INFO,console
|
||||
|
||||
# Default log4j setting for daemons spawned explicitly by
|
||||
# --daemon option of hadoop, hdfs, mapred and yarn command.
|
||||
# Java property: hadoop.root.logger
|
||||
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
|
||||
|
||||
# Default log level and output location for security-related messages.
|
||||
# You will almost certainly want to change this on a per-daemon basis via
|
||||
# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the
|
||||
# defaults for the NN and 2NN override this by default.)
|
||||
# Java property: hadoop.security.logger
|
||||
# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
|
||||
|
||||
# Default process priority level
|
||||
# Note that sub-processes will also run at this level!
|
||||
# export HADOOP_NICENESS=0
|
||||
|
||||
# Default name for the service level authorization file
|
||||
# Java property: hadoop.policy.file
|
||||
# export HADOOP_POLICYFILE="hadoop-policy.xml"
|
||||
|
||||
#
|
||||
# NOTE: this is not used by default! <-----
|
||||
# You can define variables right here and then re-use them later on.
|
||||
# For example, it is common to use the same garbage collection settings
|
||||
# for all the daemons. So one could define:
|
||||
#
|
||||
# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
|
||||
#
|
||||
# .. and then use it as per the b option under the namenode.
|
||||
|
||||
###
|
||||
# Secure/privileged execution
|
||||
###
|
||||
|
||||
#
|
||||
# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
|
||||
# on privileged ports. This functionality can be replaced by providing
|
||||
# custom functions. See hadoop-functions.sh for more information.
|
||||
#
|
||||
|
||||
# The jsvc implementation to use. Jsvc is required to run secure datanodes
|
||||
# that bind to privileged ports to provide authentication of data transfer
|
||||
# protocol. Jsvc is not required if SASL is configured for authentication of
|
||||
# data transfer protocol using non-privileged ports.
|
||||
# export JSVC_HOME=/usr/bin
|
||||
|
||||
#
|
||||
# This directory contains pids for secure and privileged processes.
|
||||
#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
|
||||
|
||||
#
|
||||
# This directory contains the logs for secure and privileged processes.
|
||||
# Java property: hadoop.log.dir
|
||||
# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
|
||||
|
||||
#
|
||||
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
|
||||
# ends up being a bit bogus. Therefore, by default, the code will
|
||||
# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants
|
||||
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
|
||||
# export HADOOP_SECURE_IDENT_PRESERVE="true"
|
||||
|
||||
###
|
||||
# NameNode specific parameters
|
||||
###
|
||||
|
||||
# Default log level and output location for file system related change
|
||||
# messages. For non-namenode daemons, the Java property must be set in
|
||||
# the appropriate _OPTS if one wants something other than INFO,NullAppender
|
||||
# Java property: hdfs.audit.logger
|
||||
# export HDFS_AUDIT_LOGGER=INFO,NullAppender
|
||||
|
||||
# Specify the JVM options to be used when starting the NameNode.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# a) Set JMX options
|
||||
# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
|
||||
#
|
||||
# b) Set garbage collection logs
|
||||
# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
|
||||
#
|
||||
# c) ... or set them directly
|
||||
# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
|
||||
|
||||
# this is the default:
|
||||
# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
|
||||
|
||||
###
|
||||
# SecondaryNameNode specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the SecondaryNameNode.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# This is the default:
|
||||
# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
|
||||
|
||||
###
|
||||
# DataNode specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the DataNode.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# This is the default:
|
||||
# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
|
||||
|
||||
# On secure datanodes, user to run the datanode as after dropping privileges.
|
||||
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
|
||||
# to provide authentication of data transfer protocol. This **MUST NOT** be
|
||||
# defined if SASL is configured for authentication of data transfer protocol
|
||||
# using non-privileged ports.
|
||||
# This will replace the hadoop.id.str Java property in secure mode.
|
||||
# export HDFS_DATANODE_SECURE_USER=hdfs
|
||||
|
||||
# Supplemental options for secure datanodes
|
||||
# By default, Hadoop uses jsvc which needs to know to launch a
|
||||
# server jvm.
|
||||
# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server"
|
||||
|
||||
###
|
||||
# NFS3 Gateway specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the NFS3 Gateway.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_NFS3_OPTS=""
|
||||
|
||||
# Specify the JVM options to be used when starting the Hadoop portmapper.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_PORTMAP_OPTS="-Xmx512m"
|
||||
|
||||
# Supplemental options for priviliged gateways
|
||||
# By default, Hadoop uses jsvc which needs to know to launch a
|
||||
# server jvm.
|
||||
# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server"
|
||||
|
||||
# On privileged gateways, user to run the gateway as after dropping privileges
|
||||
# This will replace the hadoop.id.str Java property in secure mode.
|
||||
# export HDFS_NFS3_SECURE_USER=nfsserver
|
||||
|
||||
###
|
||||
# ZKFailoverController specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the ZKFailoverController.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_ZKFC_OPTS=""
|
||||
|
||||
###
|
||||
# QuorumJournalNode specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the QuorumJournalNode.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_JOURNALNODE_OPTS=""
|
||||
|
||||
###
|
||||
# HDFS Balancer specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the HDFS Balancer.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_BALANCER_OPTS=""
|
||||
|
||||
###
|
||||
# HDFS Mover specific parameters
|
||||
###
|
||||
# Specify the JVM options to be used when starting the HDFS Mover.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_MOVER_OPTS=""
|
||||
|
||||
###
|
||||
# Router-based HDFS Federation specific parameters
|
||||
# Specify the JVM options to be used when starting the RBF Routers.
|
||||
# These options will be appended to the options specified as HADOOP_OPTS
|
||||
# and therefore may override any similar flags set in HADOOP_OPTS
|
||||
#
|
||||
# export HDFS_DFSROUTER_OPTS=""
|
||||
###
|
||||
|
||||
###
|
||||
# Advanced Users Only!
|
||||
###
|
||||
|
||||
#
|
||||
# When building Hadoop, one can add the class paths to the commands
|
||||
# via this special env var:
|
||||
# export HADOOP_ENABLE_BUILD_PATHS="true"
|
||||
|
||||
#
|
||||
# To prevent accidents, shell commands be (superficially) locked
|
||||
# to only allow certain users to execute certain subcommands.
|
||||
# It uses the format of (command)_(subcommand)_USER.
|
||||
#
|
||||
# For example, to limit who can execute the namenode command,
|
||||
# export HDFS_NAMENODE_USER=hdfs
|
6
docker/test/integration/hive_server/hdfs-site.xml
Normal file
6
docker/test/integration/hive_server/hdfs-site.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
</configuration>
|
35
docker/test/integration/hive_server/hive-site.xml
Normal file
35
docker/test/integration/hive_server/hive-site.xml
Normal file
@ -0,0 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>javax.jdo.option.ConnectionURL</name>
|
||||
<value>jdbc:mysql://localhost/hcatalog?createDatabaseIfNotExist=true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>javax.jdo.option.ConnectionUserName</name>
|
||||
<value>test</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>javax.jdo.option.ConnectionPassword</name>
|
||||
<value>test</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>javax.jdo.option.ConnectionDriverName</name>
|
||||
<value>com.mysql.jdbc.Driver</value>
|
||||
</property>
|
||||
</configuration>
|
6
docker/test/integration/hive_server/mapred-site.xml
Normal file
6
docker/test/integration/hive_server/mapred-site.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
</property>
|
||||
</configuration>
|
6
docker/test/integration/hive_server/prepare_hive_data.sh
Executable file
6
docker/test/integration/hive_server/prepare_hive_data.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
hive -e "create database test"
|
||||
|
||||
hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; "
|
||||
hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text "
|
||||
hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text"
|
12
docker/test/integration/hive_server/start.sh
Executable file
12
docker/test/integration/hive_server/start.sh
Executable file
@ -0,0 +1,12 @@
|
||||
service ssh start
|
||||
sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml
|
||||
hadoop namenode -format
|
||||
start-all.sh
|
||||
service mysql start
|
||||
mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\""
|
||||
mysql -u root -e "GRANT ALL ON * . * TO 'test'@'localhost'"
|
||||
schematool -initSchema -dbType mysql
|
||||
#nohup hiveserver2 &
|
||||
nohup hive --service metastore &
|
||||
bash /prepare_hive_data.sh
|
||||
while true; do sleep 1000; done
|
32
docker/test/integration/hive_server/yarn-site.xml
Normal file
32
docker/test/integration/hive_server/yarn-site.xml
Normal file
@ -0,0 +1,32 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services</name>
|
||||
<value>mapreduce_shuffle</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>/hadoop-3.1.0/etc/hadoop,/hadoop-3.1.0/share/hadoop/common/*,/hadoop-3.1.0/share/hadoop/common/lib/*,/hadoop-3.1.0/share/hadoop/hdfs/*, /hadoop-3.1.0/share/hadoop/hdfs/lib/*, /hadoop-3.1.0/share/hadoop/mapreduce/*, /hadoop-3.1.0/share/hadoop/mapreduce/lib/*, /hadoop-3.1.0/share/hadoop/yarn/*, /hadoop-3.1.0/share/hadoop/yarn/lib/*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
Number of seconds after an application finishes before the nodemanager's
|
||||
DeletionService will delete the application's localized file directory
|
||||
and log directory.
|
||||
|
||||
To diagnose Yarn application problems, set this property's value large
|
||||
enough (for example, to 600 = 10 minutes) to permit examination of these
|
||||
directories. After changing the property's value, you must restart the
|
||||
nodemanager in order for it to have an effect.
|
||||
|
||||
The roots of Yarn applications' work directories is configurable with
|
||||
the yarn.nodemanager.local-dirs property (see below), and the roots
|
||||
of the Yarn applications' log directories is configurable with the
|
||||
yarn.nodemanager.log-dirs property (see also below).
|
||||
</description>
|
||||
<name>yarn.nodemanager.delete.debug-delay-sec</name>
|
||||
<value>600</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
@ -0,0 +1,7 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
hdfs1:
|
||||
image: lgboustc/hive_test:v1.0
|
||||
hostname: hivetest
|
||||
restart: always
|
||||
entrypoint: bash /start.sh
|
@ -750,17 +750,24 @@ class ClickHouseCluster:
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_nginx.yml')]
|
||||
return self.base_nginx_cmd
|
||||
|
||||
def setup_hive(self, instance, env_variables, docker_compose_yml_dir):
|
||||
self.with_hive = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hive.yml')])
|
||||
self.base_hive_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_hive.yml')]
|
||||
return self.base_hive_cmd
|
||||
|
||||
def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries=None,
|
||||
macros=None, with_zookeeper=False, with_zookeeper_secure=False,
|
||||
with_mysql_client=False, with_mysql=False, with_mysql8=False, with_mysql_cluster=False,
|
||||
with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None,
|
||||
with_odbc_drivers=False, with_postgres=False, with_postgres_cluster=False, with_hdfs=False,
|
||||
with_kerberized_hdfs=False, with_mongo=False, with_mongo_secure=False, with_nginx=False,
|
||||
with_redis=False, with_minio=False, with_cassandra=False, with_jdbc_bridge=False,
|
||||
with_redis=False, with_minio=False, with_cassandra=False, with_jdbc_bridge=False, with_hive=False,
|
||||
hostname=None, env_variables=None, image="clickhouse/integration-test", tag=None,
|
||||
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None,
|
||||
zookeeper_docker_compose_path=None, minio_certs_dir=None, use_keeper=True,
|
||||
main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, config_root_name="clickhouse"):
|
||||
main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, config_root_name="clickhouse", other_configs=[]):
|
||||
|
||||
"""Add an instance to the cluster.
|
||||
|
||||
@ -814,6 +821,7 @@ class ClickHouseCluster:
|
||||
with_minio=with_minio,
|
||||
with_cassandra=with_cassandra,
|
||||
with_jdbc_bridge=with_jdbc_bridge,
|
||||
with_hive = with_hive,
|
||||
server_bin_path=self.server_bin_path,
|
||||
odbc_bridge_bin_path=self.odbc_bridge_bin_path,
|
||||
library_bridge_bin_path=self.library_bridge_bin_path,
|
||||
@ -834,7 +842,8 @@ class ClickHouseCluster:
|
||||
copy_common_configs=copy_common_configs,
|
||||
external_dirs=external_dirs,
|
||||
tmpfs=tmpfs or [],
|
||||
config_root_name=config_root_name)
|
||||
config_root_name=config_root_name,
|
||||
other_configs = other_configs)
|
||||
|
||||
docker_compose_yml_dir = get_docker_compose_path()
|
||||
|
||||
@ -927,6 +936,9 @@ class ClickHouseCluster:
|
||||
if with_jdbc_bridge and not self.with_jdbc_bridge:
|
||||
cmds.append(self.setup_jdbc_bridge_cmd(instance, env_variables, docker_compose_yml_dir))
|
||||
|
||||
if with_hive:
|
||||
cmds.append(self.setup_hive(instance, env_variables, docker_compose_yml_dir))
|
||||
|
||||
logging.debug("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format(
|
||||
self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir))
|
||||
return instance
|
||||
@ -1588,6 +1600,12 @@ class ClickHouseCluster:
|
||||
self.up_called = True
|
||||
time.sleep(10)
|
||||
|
||||
if self.with_hive and self.base_hive_cmd:
|
||||
logging.debug('Setup hive')
|
||||
subprocess_check_call(self.base_hive_cmd + common_opts)
|
||||
self.up_called = True
|
||||
time.sleep(300)
|
||||
|
||||
if self.with_minio and self.base_minio_cmd:
|
||||
# Copy minio certificates to minio/certs
|
||||
os.mkdir(self.minio_dir)
|
||||
@ -1823,13 +1841,13 @@ class ClickHouseInstance:
|
||||
self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs,
|
||||
custom_dictionaries,
|
||||
macros, with_zookeeper, zookeeper_config_path, with_mysql_client, with_mysql, with_mysql8, with_mysql_cluster, with_kafka, with_kerberized_kafka,
|
||||
with_rabbitmq, with_nginx, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_jdbc_bridge,
|
||||
with_rabbitmq, with_nginx, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_jdbc_bridge,with_hive,
|
||||
with_cassandra, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, with_postgres, with_postgres_cluster,
|
||||
clickhouse_start_command=CLICKHOUSE_START_COMMAND,
|
||||
main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True,
|
||||
hostname=None, env_variables=None,
|
||||
image="clickhouse/integration-test", tag="latest",
|
||||
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None, config_root_name="clickhouse"):
|
||||
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, external_dirs=None, tmpfs=None, config_root_name="clickhouse", other_configs=[]):
|
||||
|
||||
self.name = name
|
||||
self.base_cmd = cluster.base_cmd
|
||||
@ -1843,6 +1861,7 @@ class ClickHouseInstance:
|
||||
self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs]
|
||||
self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs]
|
||||
self.custom_dictionaries_paths = [p.abspath(p.join(base_path, c)) for c in custom_dictionaries]
|
||||
self.other_custom_config_paths = [p.abspath(p.join(base_path,c)) for c in other_configs]
|
||||
self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None
|
||||
self.kerberos_secrets_dir = p.abspath(p.join(base_path, 'secrets'))
|
||||
self.macros = macros if macros is not None else {}
|
||||
@ -2376,6 +2395,8 @@ class ClickHouseInstance:
|
||||
os.mkdir(users_d_dir)
|
||||
dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries'))
|
||||
os.mkdir(dictionaries_dir)
|
||||
other_conf_dir = p.abspath(p.join(instance_config_dir, 'other_conf.d'))
|
||||
os.mkdir(other_conf_dir)
|
||||
|
||||
def write_embedded_config(name, dest_dir, fix_log_level=False):
|
||||
with open(p.join(HELPERS_DIR, name), 'r') as f:
|
||||
@ -2422,6 +2443,8 @@ class ClickHouseInstance:
|
||||
# Copy dictionaries configs to configs/dictionaries
|
||||
for path in self.custom_dictionaries_paths:
|
||||
shutil.copy(path, dictionaries_dir)
|
||||
for path in self.other_custom_config_paths:
|
||||
shutil.copy(path, other_conf_dir)
|
||||
|
||||
db_dir = p.abspath(p.join(self.path, 'database'))
|
||||
logging.debug(f"Setup database dir {db_dir}")
|
||||
|
0
tests/integration/test_hive_query/__init__.py
Normal file
0
tests/integration/test_hive_query/__init__.py
Normal file
22
tests/integration/test_hive_query/configs/config.xml
Normal file
22
tests/integration/test_hive_query/configs/config.xml
Normal file
@ -0,0 +1,22 @@
|
||||
<clickhouse>
|
||||
<remote_servers >
|
||||
<!-- Test only shard config for testing distributed storage -->
|
||||
<simple>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</simple>
|
||||
</remote_servers>
|
||||
|
||||
<local_cache_dir>/clickhouse_local_cache</local_cache_dir>
|
||||
<local_cache_quota>207374182400</local_cache_quota>
|
||||
<local_cache_max_threads>1000</local_cache_max_threads>
|
||||
|
||||
<hdfs>
|
||||
<libhdfs3_conf>/etc/clickhouse-server/other_conf.d/hdfs-site.xml</libhdfs3_conf>
|
||||
</hdfs>
|
||||
|
||||
</clickhouse>
|
6
tests/integration/test_hive_query/configs/hdfs-site.xml
Normal file
6
tests/integration/test_hive_query/configs/hdfs-site.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
</configuration>
|
91
tests/integration/test_hive_query/test.py
Normal file
91
tests/integration/test_hive_query/test.py
Normal file
@ -0,0 +1,91 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
logging.getLogger().addHandler(logging.StreamHandler())
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance('h0_0_0', main_configs=['configs/config.xml'], other_configs=[ 'configs/hdfs-site.xml'], with_hive=True)
|
||||
|
||||
logging.info("Starting cluster ...")
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
def test_create_parquet_table(started_cluster):
|
||||
logging.info('Start testing creating hive table ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day)
|
||||
""")
|
||||
logging.info("create result {}".format(result))
|
||||
|
||||
assert result.strip() == ''
|
||||
|
||||
def test_create_orc_table(started_cluster):
|
||||
logging.info('Start testing creating hive table ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day)
|
||||
""")
|
||||
logging.info("create result {}".format(result))
|
||||
|
||||
assert result.strip() == ''
|
||||
|
||||
def test_create_text_table(started_cluster):
|
||||
logging.info('Start testing creating hive table ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
CREATE TABLE default.demo_text (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_text') PARTITION BY (tuple())
|
||||
""")
|
||||
logging.info("create result {}".format(result))
|
||||
|
||||
assert result.strip() == ''
|
||||
|
||||
def test_parquet_groupby(started_cluster):
|
||||
logging.info('Start testing groupby ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
SELECT day, count(*) FROM default.demo_parquet group by day order by day
|
||||
""")
|
||||
expected_result = """2021-11-01 1
|
||||
2021-11-05 2
|
||||
2021-11-11 1
|
||||
2021-11-16 2
|
||||
"""
|
||||
assert result == expected_result
|
||||
def test_orc_groupby(started_cluster):
|
||||
logging.info('Start testing groupby ...')
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
SELECT day, count(*) FROM default.demo_orc group by day order by day
|
||||
""")
|
||||
expected_result = """2021-11-01 1
|
||||
2021-11-05 2
|
||||
2021-11-11 1
|
||||
2021-11-16 2
|
||||
"""
|
||||
assert result == expected_result
|
||||
|
||||
def test_text_count(started_cluster):
|
||||
node = started_cluster.instances['h0_0_0']
|
||||
result = node.query("""
|
||||
SELECT day, count(*) FROM default.demo_orc group by day order by day SETTINGS format_csv_delimiter = '\x01'
|
||||
""")
|
||||
expected_result = """2021-11-01 1
|
||||
2021-11-05 2
|
||||
2021-11-11 1
|
||||
2021-11-16 2
|
||||
"""
|
||||
assert result == expected_result
|
Loading…
Reference in New Issue
Block a user