下载地址
Kettle 是一个开源的数据集成工具,它是 Pentaho Data Integration(PDI)项目的一部分。要访问 Kettle 的官方网站,可以通过访问其母公司 Hitachi Vantara 的网站来找到相关信息
官方网站:https://www.hitachivantara.com/en-us/products/data-management-analytics.html
下载地址:https://www.hitachivantara.com/en-us/products/pentaho-plus-platform/data-integration-analytics/pentaho-community-edition.html
安装
解压
windows
和Linux
安装包是一样的,解压命令: unzip pentaho-server-ce-9.4.0.0-343.zip
数据库初始化
- 选择mysql作为存储库,找到对应的sql初始化文件,自行初始化(数据库安装略)
- 提供的脚本编码和密码按需要进行修改
SQL
附件参考-- jcr CREATE DATABASE IF NOT EXISTS `jackrabbit` DEFAULT CHARACTER SET utf8; CREATE USER 'jcr_user'@'%' identified by '!QAZ2wsx'; GRANT ALL PRIVILEGES ON jackrabbit.* TO 'jcr_user'@'%' WITH GRANT OPTION; -- quartz CREATE DATABASE IF NOT EXISTS `quartz` DEFAULT CHARACTER SET utf8; CREATE USER 'pentaho_user'@'%' identified by '!QAZ2wsx'; GRANT ALL PRIVILEGES ON quartz.* to 'pentaho_user'@'%' WITH GRANT OPTION; USE `quartz`; DROP TABLE IF EXISTS QRTZ5_JOB_LISTENERS; DROP TABLE IF EXISTS QRTZ5_TRIGGER_LISTENERS; DROP TABLE IF EXISTS QRTZ5_FIRED_TRIGGERS; DROP TABLE IF EXISTS QRTZ5_PAUSED_TRIGGER_GRPS; DROP TABLE IF EXISTS QRTZ5_SCHEDULER_STATE; DROP TABLE IF EXISTS QRTZ5_LOCKS; DROP TABLE IF EXISTS QRTZ5_SIMPLE_TRIGGERS; DROP TABLE IF EXISTS QRTZ5_CRON_TRIGGERS; DROP TABLE IF EXISTS QRTZ5_BLOB_TRIGGERS; DROP TABLE IF EXISTS QRTZ5_TRIGGERS; DROP TABLE IF EXISTS QRTZ5_JOB_DETAILS; DROP TABLE IF EXISTS QRTZ5_CALENDARS; CREATE TABLE QRTZ5_JOB_DETAILS ( JOB_NAME VARCHAR(200) NOT NULL, JOB_GROUP VARCHAR(200) NOT NULL, DESCRIPTION VARCHAR(250) NULL, JOB_CLASS_NAME VARCHAR(250) NOT NULL, IS_DURABLE VARCHAR(1) NOT NULL, IS_VOLATILE VARCHAR(1) NOT NULL, IS_STATEFUL VARCHAR(1) NOT NULL, REQUESTS_RECOVERY VARCHAR(1) NOT NULL, JOB_DATA BLOB NULL, PRIMARY KEY (JOB_NAME,JOB_GROUP) ); CREATE TABLE QRTZ5_JOB_LISTENERS ( JOB_NAME VARCHAR(200) NOT NULL, JOB_GROUP VARCHAR(200) NOT NULL, JOB_LISTENER VARCHAR(200) NOT NULL, PRIMARY KEY (JOB_NAME,JOB_GROUP,JOB_LISTENER), FOREIGN KEY (JOB_NAME,JOB_GROUP) REFERENCES QRTZ5_JOB_DETAILS(JOB_NAME,JOB_GROUP) ); CREATE TABLE QRTZ5_TRIGGERS ( TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, JOB_NAME VARCHAR(200) NOT NULL, JOB_GROUP VARCHAR(200) NOT NULL, IS_VOLATILE VARCHAR(1) NOT NULL, DESCRIPTION VARCHAR(250) NULL, NEXT_FIRE_TIME BIGINT(13) NULL, PREV_FIRE_TIME BIGINT(13) NULL, PRIORITY INTEGER NULL, TRIGGER_STATE VARCHAR(16) NOT NULL, TRIGGER_TYPE VARCHAR(8) NOT NULL, START_TIME BIGINT(13) NOT NULL, END_TIME BIGINT(13) NULL, CALENDAR_NAME VARCHAR(200) NULL, MISFIRE_INSTR SMALLINT(2) NULL, JOB_DATA BLOB NULL, PRIMARY KEY (TRIGGER_NAME,TRIGGER_GROUP), FOREIGN KEY (JOB_NAME,JOB_GROUP) REFERENCES QRTZ5_JOB_DETAILS(JOB_NAME,JOB_GROUP) ); CREATE TABLE QRTZ5_SIMPLE_TRIGGERS ( TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, REPEAT_COUNT BIGINT(7) NOT NULL, REPEAT_INTERVAL BIGINT(12) NOT NULL, TIMES_TRIGGERED BIGINT(10) NOT NULL, PRIMARY KEY (TRIGGER_NAME,TRIGGER_GROUP), FOREIGN KEY (TRIGGER_NAME,TRIGGER_GROUP) REFERENCES QRTZ5_TRIGGERS(TRIGGER_NAME,TRIGGER_GROUP) ); CREATE TABLE QRTZ5_CRON_TRIGGERS ( TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, CRON_EXPRESSION VARCHAR(200) NOT NULL, TIME_ZONE_ID VARCHAR(80), PRIMARY KEY (TRIGGER_NAME,TRIGGER_GROUP), FOREIGN KEY (TRIGGER_NAME,TRIGGER_GROUP) REFERENCES QRTZ5_TRIGGERS(TRIGGER_NAME,TRIGGER_GROUP) ); CREATE TABLE QRTZ5_BLOB_TRIGGERS ( TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, BLOB_DATA BLOB NULL, PRIMARY KEY (TRIGGER_NAME,TRIGGER_GROUP), FOREIGN KEY (TRIGGER_NAME,TRIGGER_GROUP) REFERENCES QRTZ5_TRIGGERS(TRIGGER_NAME,TRIGGER_GROUP) ); CREATE TABLE QRTZ5_TRIGGER_LISTENERS ( TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, TRIGGER_LISTENER VARCHAR(200) NOT NULL, PRIMARY KEY (TRIGGER_NAME,TRIGGER_GROUP,TRIGGER_LISTENER), FOREIGN KEY (TRIGGER_NAME,TRIGGER_GROUP) REFERENCES QRTZ5_TRIGGERS(TRIGGER_NAME,TRIGGER_GROUP) ); CREATE TABLE QRTZ5_CALENDARS ( CALENDAR_NAME VARCHAR(200) NOT NULL, CALENDAR BLOB NOT NULL, PRIMARY KEY (CALENDAR_NAME) ); CREATE TABLE QRTZ5_PAUSED_TRIGGER_GRPS ( TRIGGER_GROUP VARCHAR(200) NOT NULL, PRIMARY KEY (TRIGGER_GROUP) ); CREATE TABLE QRTZ5_FIRED_TRIGGERS ( ENTRY_ID VARCHAR(95) NOT NULL, TRIGGER_NAME VARCHAR(200) NOT NULL, TRIGGER_GROUP VARCHAR(200) NOT NULL, IS_VOLATILE VARCHAR(1) NOT NULL, INSTANCE_NAME VARCHAR(200) NOT NULL, FIRED_TIME BIGINT(13) NOT NULL, PRIORITY INTEGER NOT NULL, STATE VARCHAR(16) NOT NULL, JOB_NAME VARCHAR(200) NULL, JOB_GROUP VARCHAR(200) NULL, IS_STATEFUL VARCHAR(1) NULL, REQUESTS_RECOVERY VARCHAR(1) NULL, PRIMARY KEY (ENTRY_ID) ); CREATE TABLE QRTZ5_SCHEDULER_STATE ( INSTANCE_NAME VARCHAR(200) NOT NULL, LAST_CHECKIN_TIME BIGINT(13) NOT NULL, CHECKIN_INTERVAL BIGINT(13) NOT NULL, PRIMARY KEY (INSTANCE_NAME) ); CREATE TABLE QRTZ5_LOCKS ( LOCK_NAME VARCHAR(40) NOT NULL, PRIMARY KEY (LOCK_NAME) ); INSERT INTO QRTZ5_LOCKS values('TRIGGER_ACCESS'); INSERT INTO QRTZ5_LOCKS values('JOB_ACCESS'); INSERT INTO QRTZ5_LOCKS values('CALENDAR_ACCESS'); INSERT INTO QRTZ5_LOCKS values('STATE_ACCESS'); INSERT INTO QRTZ5_LOCKS values('MISFIRE_ACCESS'); CREATE DATABASE IF NOT EXISTS `hibernate` DEFAULT CHARACTER SET utf8; -- repository USE hibernate; CREATE USER 'hibuser'@'%' identified by '!QAZ2wsx'; GRANT ALL PRIVILEGES ON hibernate.* TO 'hibuser'@'%' WITH GRANT OPTION; FLUSH PRIVILEGES;
配置文件修改
基本上都是配置Mysql数据库信息
pentaho-server/pentaho-solutions/system/quartz/quartz.properties
#org.quartz.jobStore.driverDelegateClass = org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
org.quartz.jobStore.driverDelegateClass = org.quartz.impl.jdbcjobstore.StdJDBCDelegate
pentaho-server/pentaho-solutions/system/applicationContext-spring-security-jdbc.properties
datasource.driver.classname=com.mysql.jdbc.Driver
datasource.url=jdbc:mysql://xxxx:3306/hibernate
datasource.username=hibuser
datasource.password=!QAZ2wsx
datasource.validation.query=SELECT 1
datasource.pool.max.wait=-1
datasource.pool.max.active=8
pentaho-server/pentaho-solutions/system/simple-jndi/jdbc.properties
SampleData/type=javax.sql.DataSource
SampleData/driver=com.mysql.cj.jdbc.Driver
SampleData/url.cj.jdbc:mysql://xxxx:3306/hibernate
SampleData/user=hibuser
SampleData/password=!QAZ2wsx
Hibernate/type=javax.sql.DataSource
Hibernate/driver=com.mysql.cj.jdbc.Driver
Hibernate/url.cj.jdbc:mysql://xxxx:3306/hibernate
Hibernate/user=hibuser
Hibernate/password=!QAZ2wsx
Quartz/type=javax.sql.DataSource
Quartz/driver=com.mysql.cj.jdbc.Driver
Quartz/url.cj.jdbc:mysql://xxxx:3306/quartz
Quartz/user=pentaho_user
Quartz/password=!QAZ2wsx
Shark/type=javax.sql.DataSource
Shark/driver=com.mysql.cj.jdbc.Driver
Shark/url.cj.jdbc:mysql://xxxx:3306/hbibernate
Shark/user=hibuser
Shark/password=!QAZ2wsx
SampleDataAdmin/type=javax.sql.DataSource
SampleDataAdmin/driver=com.mysql.cj.jdbc.Driver
SampleDataAdmin/url.cj.jdbc:mysql://xxxx:3306/hibernate
SampleDataAdmin/user=hibuser
SampleDataAdmin/password=!QAZ2wsx
pentaho-server/pentaho-solutions/system/applicationContext-spring-security-hibernate.properties
jdbc.driver=com.mysql.cj.jdbc.Driver
jdbc.url=jdbc:mysql://xxx:3306/hibernate
jdbc.username=hibuser
jdbc.password=!QAZ2wsx
hibernate.dialect=org.hibernate.dialect.MySQL5Dialect
还有一处需要修改dialects/mysql5/applicationContext-spring-security-hibernate.properties
pentaho-server/pentaho-solutions/system/hibernate/hibernate-settings.xml
<config-file>system/hibernate/mysql5.hibernate.cfg.xml</config-file>
pentaho-server/pentaho-solutions/system/hibernate/mysql5.hibernate.cfg.xml
<property name="connection.driver_class">com.mysql.cj.jdbc.Driver</property>
<property name="connection.url">jdbc:mysql://xxxx:3306/hibernate</property>
<property name="dialect">org.hibernate.dialect.MySQL5InnoDBDialect</property>
<property name="connection.username">hibuser</property>
<property name="connection.password">!QAZ2wsx</property>
<property name="connection.pool_size">10</property>
<property name="show_sql">false</property>
<property name="hibernate.jdbc.use_streams_for_binary">true</property>
替换audit_sql.xml
pentaho-server/pentaho-solutions/system/jackrabbit/repository.xml
要改的地方有点多
<!--
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
-->
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="url" value="jdbc:mysql://xxxx:3306/jackrabbit"/>
<param name="user" value="jcr_user"/>
<param name="password" value="!QAZ2wsx"/>
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="fs_repos_"/>
</FileSystem>
<!-- <DataStore class="org.apache.jackrabbit.core.data.FileDataStore"/>-->
<DataStore class="org.apache.jackrabbit.core.data.db.DbDataStore">
<param name="url" value="jdbc:mysql://xxxx:3306/jackrabbit"/>
<param name="user" value="jcr_user"/>
<param name="password" value="!QAZ2wsx"/>
<param name="databaseType" value="mysql"/>
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="minRecordLength" value="1024"/>
<param name="maxConnections" value="3"/>
<param name="copyWhenReading" value="true"/>
<param name="tablePrefix" value=""/>
<param name="schemaObjectPrefix" value="ds_repos_"/>
</DataStore>
<!-- <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/version" /> -->
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="url" value="jdbc:mysql://xxxx:3306/jackrabbit"/>
<param name="user" value="jcr_user"/>
<param name="password" value="!QAZ2wsx"/>
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="fs_ws_"/>
</FileSystem>
<!--<PersistenceManager class="org.apache.jackrabbit.core.persistence.pool.H2PersistenceManager">
<param name="url" value="jdbc:h2:${wsp.home}/db"/>
<param name="schemaObjectPrefix" value="${wsp.name}_"/>
</PersistenceManager>-->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.MySqlPersistenceManager">
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="url" value="jdbc:mysql://221.221.221.7:3306/jackrabbit"/>
<param name="user" value="jcr_user" />
<param name="password" value="!QAZ2wsx" />
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="${wsp.name}_pm_ws_"/>
</PersistenceManager>
<!--<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>-->
<FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="url" value="jdbc:mysql://xxxx:3306/jackrabbit"/>
<param name="user" value="jcr_user"/>
<param name="password" value="!QAZ2wsx"/>
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="fs_ver_"/>
</FileSystem>
<!--<PersistenceManager class="org.apache.jackrabbit.core.persistence.pool.H2PersistenceManager">
<param name="url" value="jdbc:h2:${rep.home}/version/db"/>
<param name="schemaObjectPrefix" value="version_"/>
</PersistenceManager>-->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.MySqlPersistenceManager">
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="url" value="jdbc:mysql://221.221.221.7:3306/jackrabbit"/>
<param name="user" value="jcr_user" />
<param name="password" value="!QAZ2wsx" />
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="pm_ver_"/>
</PersistenceManager>
<!-- <Journal class="org.apache.jackrabbit.core.journal.MemoryJournal"/> -->
<Journal class="org.apache.jackrabbit.core.journal.DatabaseJournal">
<param name="revision" value="${rep.home}/revision.log"/>
<param name="url" value="jdbc:mysql://221.221.221.7:3306/jackrabbit"/>
<param name="driver" value="com.mysql.cj.jdbc.Driver"/>
<param name="user" value="jcr_user"/>
<param name="password" value="!QAZ2wsx"/>
<param name="schema" value="mysql"/>
<param name="databaseType" value="mysql"/>
<param name="janitorEnabled" value="true"/>
<param name="janitorSleep" value="86400"/>
<param name="janitorFirstRunHourOfDay" value="3"/>
</Journal>
tomcat
-
替换
pentaho-server/tomcat/lib
下面的mysql
驱动包
-
修改配置文件
pentaho-server/tomcat/webapps/pentaho/META-INF/context.xml
<?xml version="1.0" encoding="UTF-8"?> <Context path="/pentaho" docbase="webapps/pentaho/"> <Resource validationQuery="select 1" url="jdbc:mysql://xxxx:3306/hibernate" driverClassName="com.mysql.cj.jdbc.Driver" password="!QAZ2wsx" username="hibuser" initialSize="0" maxActive="20" maxIdle="10" maxWait="10000" factory="org.apache.commons.dbcp.BasicDataSourceFactory" type="javax.sql.DataSource" auth="Container" name="jdbc/Hibernate"/> <Resource validationQuery="select 1" url="jdbc:mysql://xxxx:3306/quartz" driverClassName="com.mysql.cj.jdbc.Driver" password="!QAZ2wsx" username="pentaho_user" testOnBorrow="true" initialSize="0" maxActive="20" maxIdle="10" maxWait="10000" factory="org.apache.commons.dbcp.BasicDataSourceFactory" type="javax.sql.DataSource" auth="Container" name="jdbc/Quartz"/> <Resource name="jdbc/jackrabbit" auth="Container" type="javax.sql.DataSource" factory="org.apache.commons.dbcp.BasicDataSourceFactory" maxActive="20" minIdle="0" maxIdle="5" initialSize="0" maxWait="10000" username="jcr_user" password="!QAZ2wsx" driverClassName="com.mysql.cj.jdbc.Driver" url="jdbc:mysql://xxxx:3306/jackrabbit" validationQuery="select 1"/> </Context>
-
注释
pentaho-server/tomcat/webapps/pentaho/WEB-INF/web.xml
相关配置
搜索关键字BEGIN HSQLDB DATABASES
搜索关键字BEGIN HSQLDB STARTER
pentaho-server/pentaho-solutions/system/systemListeners.xml
启动服务
./start-pentaho.sh
登录控制台
客户端连接
- 本地直接解压上面下载的客户端安装包,启动
spoon
问题排查
遇到的问题就两种,一是端口冲突,二是文件配置不对(少个标签呀、密码不对呀),根据日志直接排查即可
查看日志:pentaho-server/tomcat/logs
禁用H2
启动报H2端口冲突错,即使你已经指定了 MySQL 数据库作为 Pentaho BI Server 的数据源,H2 数据库服务器仍然会尝试启动。这是因为 H2 数据库用于存储 Pentaho 系统的一些内部信息和配置。
要解决这个问题,你可以选择以下两种方法之一:
- 更改 H2 数据库服务器的端口:
如上一个回答所述,你可以修改GettingStartedDB-spring.xml
文件中的端口号,将 H2 数据库服务器绑定到一个新的、未被使用的端口。 - 禁用 H2 数据库服务器:
如果你确定不需要使用 H2 数据库服务器,可以将其完全禁用。为此,请在GettingStartedDB-spring.xml
文件中找到h2Server
bean 的定义,并将<bean>
标签的init-method="start"
属性更改为init-method=""
。这将阻止 Spring 在初始化上下文时启动 H2 数据库服务器。
请注意,在进行任何更改后,都需要重新启动 Pentaho BI Server 以使更改生效。
<constructor-arg>
<array value-type="java.lang.String">
<value>-tcp</value>
<value>-tcpAllowOthers</value>
<value>-tcpPort</value>
<value>9792</value>
</array>
</constructor-arg>
其它
-
只是为了个人了解,直接本地解压启动即可,无需任何配置
- 服务端
- 客户端
具体使用,正在研究中…
=======================================================================
20231206更新: pdi-ce-9.4.0.0-343.zip和pentaho-server-ce-9.4.0.0-343.zip区别及简单使用
- 服务端