CentOs部署大数据
大约 23 分钟
基础安装配置
#安装软件
yum install wget
yum install vim
yum install rsync #用于远端服务器日志同步到本地,方便进行测试
yum install sshpass #用于远端服务器日志同步到本地,方便进行测试
#关闭防火墙
#查看防火墙状态 如果看到有绿色字样标注的“active(running)”,说明防火墙是开启状态
systemctl status firewalld.service
#开启防火墙
systemctl start firewalld.service
#关闭防火墙 重启后会自动开启
systemctl stop firewalld.service
#禁用防火墙服务
systemctl disable firewalld.service
下载大数据必备软件
软件版本需要注意 jdk,hadoop,hbase对应关系
请在ROOT用户下完成所有操作
安装JAVA
cd /opt
#java-8 oracle官网下载需要登录oracle账号
tar -zxvf jdk-8u271-linux-x64.tar.gz -C /opt/
mv jdk1.8.0_271 jdk8
vim ~/.bashrc
#以下代码添至文件末尾
export JAVA_HOME=/opt/jdk8
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
#配置环境变量生效
source ~/.bashrc
#查看java版本
java -version
#java-17 TODO 留着备用,hadoop最佳使用java-8
wget https://download.oracle.com/java/17/latest/jdk-17_linux-x64_bin.tar.gz
tar -zxvf jdk-17_linux-x64_bin.tar.gz -C /opt/
mv jdk-17.0.10 jdk17
安装HADOOP
#由于jdk版本过高,打开hdfsweb 会导致报错 仅适用于jdk-11 未验证
#Failed to retrieve data from /webhdfs/v1?op=LISTSTATUS: Cannot invoke "com.sun.jersey.spi.container.WebApplication.isTracingEnabled()" because "wa" is null
#参考 https://blog.csdn.net/weixin_45688123/article/details/105107585
#下载地址 https://jar-download.com/?search_box=javax.activation
#解压复制jar到 /opt/hadoop/share/hadoop/common
#hadoop-2.10.2
wget https://downloads.apache.org/hadoop/common/hadoop-2.10.2/hadoop-2.10.2.tar.gz
tar -zxvf hadoop-2.10.2.tar.gz -C /opt/
mv hadoop-2.10.2 hadoop
vim ~/.bashrc
#以下代码添至文件末尾
export HADOOP_HOME=/opt/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH
#配置环境变量生效
source ~/.bashrc
#查看hadoop版本
hadoop version
#hadoop安装配置需要一点点耐心
#切换到Hadoop配置目录
cd /opt/hadoop/etc/hadoop
vim core-site.xml
#在configuration区域中添加
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/opt/hadoop/tmp</value>
<description>Abasefor other temporary directories.</description>
</property>
</configuration>
vim hdfs-site.xml
#在configuration区域中添加
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/opt/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
#如果是hadoop集群还需要配置多个配置文件,这里为单机部署,所以只需配置两个文件即可
#启动运行
#文件系统初始化,后续不用多次初始化
hdfs namenode -format
#启动 期间会要求输入密码 并要求yes
start-dfs.sh
#查看进程
jps
#包含最基础的nameNode和DataNode
18978 SecondaryNameNode
19126 Jps
18792 DataNode
18652 NameNode
#停止 期间会要求输入密码
stop-dfs.sh
#为什么没有yarn进程?因为单机部署
# http://IP:50070/ 管理页面
安装HBASE
#wget可能会提示证书过期错误 添加使用“--no-check-certificate”即可
#hbase-2.3.7
wget https://archive.apache.org/dist/hbase/2.3.7/hbase-2.3.7-bin.tar.gz
tar -zxvf hbase-2.3.7-bin.tar.gz -C /opt/
mv hbase-2.3.7 hbase
vim ~/.bashrc
#以下代码添至文件末尾
export HBASE_HOME=/opt/hbase
export PATH=${HBASE_HOME}/bin:$PATH
#配置环境变量生效
source ~/.bashrc
#启动运行
start-hbase.sh
安装HIVE
#hive-2.3.9
wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz
tar -zxvf apache-hive-2.3.9-bin.tar.gz -C /opt/
mv apache-hive-2.3.9-bin hive
vim ~/.bashrc
#以下代码添至文件末尾
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
#配置环境变量生效
source ~/.bashrc
#准备好jdbc驱动 mysql-connector-j-8.3.0.jar
#将MySQL的JDBC驱动拷贝到Hive的lib目录下
cp mysql-connector-j-8.3.0.jar hive/lib/
cd /opt/hive/conf/
#新建 注意,在xml中 & 需要被替换为 &
vim hive-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- jdbc连接的URL -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.1.33:3306/big_data?serverTimezone=GMT%2B8&characterEncoding=utf8&useSSL=true</value>
</property>
<!-- jdbc连接的Driver-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<!-- jdbc连接的username-->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- jdbc连接的password -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>feel2008</value>
</property>
<!-- Hive默认在HDFS的工作目录 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/hive/warehouse</value>
</property>
</configuration>
#初始化元数据库
schematool -dbType mysql -initSchema -verbose
#启动运行
hive
show databases;
show tables;
初始化元数据库后出现以下信息表示成功
运行hive后出现以下信息表示成功
安装elasticsearch
#elasticsearch-8.12.2
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.12.2-linux-x86_64.tar.gz
tar -zxvf elasticsearch-8.12.2-linux-x86_64.tar.gz -C /opt/
mv elasticsearch-8.12.2 elasticsearch
#elasticsearch 需要非ROOT运行
groupadd esgroup
useradd esuser -p 123456
chown -R esuser:esgroup /opt/elasticsearch
vim config/elasticsearch.yml
#添加到末尾,作用是为了跨域请求
http.cors.enabled: true
http.cors.allow-origin: "*"
#2024.3.20 关闭ssl功能,减少ssl配置
xpack.security.http.ssl.enabled: false
#还需要在本地配置可视化界面elasticsearch-head
#下载
wget https://codeload.github.com/mobz/elasticsearch-head/zip/refs/heads/master
#解压,进入目录
npm install
npm run start
#可视化页面
https://127.0.0.1:9100
#elasticsearch-head登录uri
http://192.168.1.74:9200/?auth_user=elastic&auth_password=nsQywcgUoC3ZljMJJ0p1
su esuser
#启动运行
cd bin
./elasticsearch
#后台运行
./elasticsearch -d
#注意,默认ssl登录
https://IP:9200
运行成功后输出的密钥信息要保存 Password for the elastic user (reset with bin/elasticsearch-reset-password -u elastic
) es用户信息密码,并且可以通过bin/elasticsearch-reset-password -u elastic重置 Configure Kibana to use this cluster kibana初始化与es链接SSl的token 有效期30分钟 过期使用bin/elasticsearch-create-enrollment- token -s kibana 再次创建
账号 elastic
密码 nsQywcgUoC3ZljMJJ0p1
登录成功后输出以下信息
登录前端界面成功后输出以下信息
安装Logstash
Logstash版本和elasticsearch版本一定要对应上,否则报错会摸不着头脑 参考 https://www.elastic.co/cn/support/matrix#matrix_compatibility
#logstash-8.12.2
wget https://artifacts.elastic.co/downloads/logstash/logstash-8.12.2-linux-x86_64.tar.gz
tar -zxvf logstash-8.12.2-linux-x86_64.tar.gz -C /opt/
mv logstash-8.12.2 logstash
vim config/logstash.yml
#添加到末尾,作用是为了跨域请求
http.host: “0.0.0.0”
#配置文件
vim myconf.conf
input {
file {
path => "/opt/logs/*.log"
start_position => "beginning" # 从文件头开始读取(默认为end)
sincedb_path => "/path/to/sincedb_file" # 自动记录每个文件最后读取的位置
ignore_older => 0 # 忽略比当前时间更旧的文件,默认为0表示不忽略任何文件
discover_interval => 15 # 发现新文件的时间间隔(秒),可选配置项
}
}
filter {
if [message] =~ "AXBBackWordReq" {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp},%{NUMBER} - %{WORD:action}\(sessionid=%{DATA:sessionid}, callerNumber=%{DATA:callerNumber}, calledNumber=%{DATA:calledNumber}, extNumber=%{DATA:extNumber}, dialTime=%{NUMBER:dialTime}\)" }
}
}
else if [message] =~ "AXBBackWordResp" {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp},%{NUMBER} - %{WORD:action}\(sessionid=%{DATA:sessionid}, code=%{NUMBER:code}, calledNumber=%{DATA:calledNumber}, userData=%{DATA:userData}, displayNumber=%{DATA:displayNumber}\)" }
}
}
}
output{
elasticsearch {
hosts => ["http://192.168.1.74:9200"]
user => "elastic"
password => "nsQywcgUoC3ZljMJJ0p1"
index => "my_index-%{+YYYY.MM.dd}" # 包含日期的动态索引名称
manage_template => false # 禁用Logstash自动生成模板
template => "myTemplate.json" # 指定模板文件路径
}
}
vim myTemplate.json
#json中不能存在注释,需要原样复制
{
"index_patterns": ["axblogstash-*"],
"mappings": {
"properties": {
"timestamp": { "type": "date" },
"action": { "type": "keyword" },
"sessionid": { "type": "keyword" },
"callerNumber": { "type": "keyword" },
"calledNumber": { "type": "keyword" },
"extNumber": { "type": "keyword" },
"dialTime": { "type": "long" },
"code": { "type": "integer" },
"userData": { "type": "text", "analyzer": "standard" },
"displayNumber": { "type": "keyword" }
}
},
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
}
}
#启动测试
./bin/logstash -e 'input {stdin {}} output{stdout{}}'
#检查配置文件
./bin/logstash -f myconf.conf -t
#以配置文件启动
./bin/logstash -f myconf.conf
#访问url
http://192.168.1.74:9600/
访问成功后输出以下信息
安装Kibana
Kibana版本和elasticsearch版本一定要对应上,否则报错也会摸不着头脑
#kibana-8.12.2
wget https://artifacts.elastic.co/downloads/kibana/kibana-8.12.2-linux-x86_64.tar.gz
tar -zxvf kibana-8.12.2-linux-x86_64.tar.gz -C /opt/
mv kibana-8.12.2 kibana
#kibana 需要非ROOT运行,在es配置创建了esuser这里进行使用
chown -R esuser:esgroup /opt/kibana
#需要先在es中创建kibana_system账户 执行后得到密码 =NXKu=j*_YSkBkBYkovr
./bin/elasticsearch-reset-password -u kibana_system
#配置文件config/kibana.yml 添加以下配置
server.port: 5601
server.host: "0.0.0.0"
elasticsearch.hosts: ["http://192.168.1.74:9200"]
elasticsearch.username: "kibana_system"
elasticsearch.password: "=NXKu=j*_YSkBkBYkovr"
i18n.locale: "zh-CN"
#启动kibana
su esuser
./bin/kibana
http://192.168.1.74:5601/
#登录需要账号密码
**账号 elastic**
**密码 nsQywcgUoC3ZljMJJ0p1**