单机安装
- yum install yum-utils
- rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
- yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
- sudo yum install clickhouse-server clickhouse-client
配置文件
vim /etc/clickhouse-server/config.xml
<remote_servers>
<clickhouse_3shards_1replicas>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>10.0.5.153</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<internal_replication>true</internal_replication>
<host>10.0.5.154</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>10.0.5.155</host>
<port>9001</port>
</replica>
</shard>
</clickhouse_3shards_1replicas>
</remote_servers>
<zookeeper>
<node>
<host>10.0.5.37</host>
<port>2181</port>
</node>
<node>
<host>10.0.5.37</host>
<port>2182</port>
</node>
<node>
<host>10.0.5.37</host>
<port>2183</port>
</node>
</zookeeper>
<macros>
<shard>01</shard>
<replica>10.0.5.153</replica>
</macros>
<macros>
<shard>02</shard>
<replica>10.0.5.154</replica>
</macros>
<macros>
<shard>03</shard>
<replica>10.0.5.155</replica>
</macros>
使用scp命令将配置文件传到两外两台机器上:
scp /etc/clickhouse-server/config.xml root@10.0.5.153:/etc/clickhouse-server/config.xml
scp /etc/clickhouse-server/config.xml root@10.0.5.154:/etc/clickhouse-server/config.xml
clickhouse安装位置
- 配置文件: /etc/clickhouse-server/config.xml
- data存储: /var/lib/clickhouse/
开启zk集群
这个参考我之前发布的下载资源,windows下直接使用cmd即可打开zk集群服务。
启动Server(有点坑)
- 指明配置文件的情况下,如果是root账号,要使用 sudo -u clickhouse /usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml,否则就提示 Application: DB::Exception: Effective user of the process (root) does not match the owner of the data (clickhouse). Run under ‘sudo -u clickhouse’。一旦你首次用root账号指定配置文件启动的,启动错了,那么再用systemctl就无法启动了。
需要修改文件组:
- chgrp clickhouse clickhouse -R
- chgrp clickhouse /var/lib/clickhouse -R
- chown clickhouse /var/lib/clickhouse -R
- chown clickhouse /var/log/clickhouse-server -R
- chgrp clickhouse /var/log/clickhouse-server -R
客户端连接
# -m是多行模式,分号+回车执行当前语句。
clickhouse-client --port=xxx --host=xxx --user=xxx --password=xxx -m
查看集群是否搭建成功
基础SQL
-- 集群上创建数据库
create database a9_mixed on cluster clickhouse_3shards_1replicas;
-- 在已有数据的表上创建分布式表,这里t_log_goods是机器上已经存在数据的表。
create table t_log_goods_all on cluster clickhouse_3shards_1replicas [as a9_mixed.t_log_goods] engine = Distributed(clickhouse_3shards_1replicas,a9_mixed,t_log_goods);
-- show create table 的语句为:
CREATE TABLE a9_mixed.t_log_goods_all
(
`val` Int32,
`new` Int32,
`flag` Int32,
`rname` String,
`optime` Int32,
`level` Int32,
`goodsid` Int32,
`old` Int32,
`rid` Int64,
`platform` Int32,
`sid` Int32,
`quality` Int32,
`uid` Int32,
`guid` Int64,
`id` Int32,
`sdk` String,
`power` Int64,
`seri` Int32,
`goodstype` Int32
)
ENGINE = Distributed('clickhouse_3shards_1replicas', 'a9_mixed', 't_log_goods');
-- 重新在机器上执行 这样子只会在执行sql的机器上新建名字为t_log_goods_all的表,其他机器并不会新建表。
-- 删除分布式表
drop table t_log_goods_all on cluster clickhouse_3shards_1replicas;
-- 新建本地表,然后对应一张分布式表
CREATE TABLE t_local on cluster clickhouse_3shards_1replicas
(
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate);
-- 对应的分布式表
CREATE TABLE t_logical_Distributed on cluster clickhouse_3shards_1replicas
(
EventDate DateTime,
CounterID UInt32,
UserID UInt32
)
ENGINE = Distributed(clickhouse_3shards_1replicas, test01, t_local, CounterID) ;
-- 向分布式表中写入数据,分布式表最终也是把数据分散写入对应的分片。
INSERT INTO t_logical_Distributed VALUES ('2021-01-16 00:00:00', 5, 5),('2021-02-10 00:00:00',6, 6),('2021-03-10 00:00:00',4, 4);