DB2安装步骤
##################################DB2的安装########################### ## 安装前准备 ## 关闭内存地址随机化机制 vi /etc/sysctl.conf #增加 kernel.randomize_va_space=0 sysctl -p ##上传压缩包至/user/local目录下,并解压 cd /home/db2 #tar -xvf v9.7fp9_linuxx64_server.tar.gz tar -xvfv10.5_linuxx64_expc.tar.gz cd /usr/local/expc ## 开始安装 ./db2_install 2.配置实例用户 ## root 用户执行 mkdir -p /db2home groupadd -g 607 dagadm groupadd -g 608 dagfenc useradd -u 607 -g dagadm -d /db2home/dainst -m dainst useradd -u 608 -g dagfenc -d /home/dafenc -m dafenc useradd -u 609 -g dagadm -d /db2home/dw -m dw ## 修改配置文件,添加端口号 vi /etc/services #dainst db2c_dainst 62000/tcp DB2_dainst 62001/tcp DB2_dainst_1 62002/tcp DB2_dainst_2 62003/tcp DB2_dainst_END 62004/tcp ## 新建实例 cd /opt/ibm/db2/V10.5/instance ./db2icrt -u dafenc dainst ## dainst 用户执行 su - dainst db2set DB2CODEPAGE=1386 db2set DB2COMM=TCPIP db2set DB2_PARALLEL_IO=* db2 update dbm cfg using SVCENAME db2c_dainst db2 get dbm cfg|grep -w SVCENAME cat /db2home/dainst/sqllib/db2nodes.cfg db2start ##创建DWMM数据库(用实例用户执行) mkdir -p /db2home/dainst/etldb db2 "CREATE DATABASE dwmm AUTOMATIC STORAGE NO ON /db2home/dainst/etldb USING CODESET GBK TERRITORY CN RESTRICTIVE"
#系统安装预备初始化 #-------------------------系统安装预备初始化----------------------------- 1.防火墙操作: #关闭防火墙 service iptables stop #开启防火墙:service iptables start #禁止自动启动防火墙 chkconfig iptables off #开启自动启动防火墙 :chkconfig iptables on 查看防火墙:/etc/init.d/iptables status 2.命名节点名称 vi /etc/sysconfig/network 3.配置host文件 vi /etc/hosts #如:192.168.46.131 hadoop 4.修改系统时间时区 rm -rf /etc/localtime cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime 5.ssh免密登录 #对每个节点分别产生公钥和私钥: cd ~/.ssh ssh-keygen -t dsa -P \'\' -f ~/.ssh/id_dsa #将公钥文件复制保存为authorized_keys cat id_dsa.pub >> authorized_keys ##python安装升级 #-------------------------python安装升级---------------------- 1.将python安装包Python-2.7.13.tgz上传至/usr/local/目录下 cd /usr/local 2.解压安装包 tar -zxvf Python-2.7.13.tgz cd /usr/local/Python-2.7.13 3.指定python安装目录 ./configure --prefix=/usr/local/python-2.7/ make && make install cd /usr/bin/ ls python* 4.删除原来的python rm -rf /usr/bin/python 5.创建软链接 ln -s /usr/local/python-2.7.13/bin/python2 /usr/bin/python 6.验证python版本 python -V 7.成功后可以删除源文件,保留安装包文件 cd /usr/local rm -rf Python-2.7.13 #---------------------未安装成功--------------------- #上传setuptools包到/usr/local目录下 cd /usr/local/ #解压 setuptools-2.0.tar.gz包 tar -zxvf setuptools-2.0.tar.gz cd setuptools-2.0 python setup.py install ---------------------------------------------------- #jdk安装 #-------------------------jdk安装----------------------------- 1.将jdk安装包jdk-8u51-linux-x64.gz上传至/usr/local/目录下 cd /usr/local 2.解压jdk安装包 tar -zxvf jdk-8u51-linux-x64.gz #重命名安装包 mv jdk1.8.0_51 jdk 3.配置环境变量 vi /etc/profile export JAVA_HOME=/usr/local/jdk export JAVA_BIN=$JAVA_HOME/bin export JAVA_LIB=$JAVA_HOME/lib export CLASSPATH=.:$JAVA_LIB/tools.jar:$JAVA_LIB/dt.jar #最后将所有的路径加到 export PATH=.:$JAVA_BIN:$PATH #使环境变量生效 source /etc/profile 4.验证jdk安装是否成功 java -version #mysql安装 #-----------------------安装mysql------------------------ 1.上传mysql安装包到/usr/local目录下 cd /usr/local 2.卸载依赖包 #查找安装了的mysql rpm -qa | grep mysql #如果有,则执行命令卸载 rpm -e mysql-libs-5.1.71-1.el6.x86_64 --nodeps 2.安装mysql rpm -ivh MySQL-client-5.6.22-1.el6.x86_64.rpm --nodeps rpm -ivh MySQL-server-5.6.22-1.el6.x86_64.rpm --nodeps 3.启动mysql服务 service mysql start 4.查看root账号密码并登陆 cat /root/.mysql_secret #JElTlG9gVsKAhRKS WQlcq4jUug_KK_ZY #登录mysql mysql -uroot -p密码 #设置密码 mysql> SET PASSWORD = PASSWORD(\'root\'); #测试新密码登录 mysql -uroot -proot 5设置允许远程登录 GRANT ALL PRIVILEGES ON *.* TO \'root\'@\'%\' IDENTIFIED BY \'root\' WITH GRANT OPTION; flush privileges; exit; 6.设置开机自动启动 chkconfig mysql on #安装hadoop #------------------------hadoop安装-------------------------- 1.上传hadoop安装包到/usr/local目录下 #进入/usr/local/目录下 cd /usr/local 2.解压hadoop安装包 tar -xzvf hadoop-2.6.0.tar.gz #重命令hadoop mv hadoop-2.6.0 hadoop 3.设置hadoop环境变量 vi /etc/profile export HADOOP_HOME=/usr/local/hadoop #修改: export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH 4.配置hadoop的参数 #4.1 修改hadoop-env.sh文件 #添加java_home的环境变量 cd /usr/local/hadoop/etc/hadoop vi hadoop-env.sh JAVA_HOME=/usr/local/jdk #4.2 配置core-site.xml cd /usr/local/hadoop/etc/hadoop vi core-site.xml <configuration> <property> <name>fs.default.name</name> <value>hdfs://hadoop:9000</value> <description>change your own hostname</description> </property> <property> <name>hadoop.tmp.dir</name> <value>/usr/local/hadoop/tmp</value> </property> </configuration> #4.3 配置hdfs-site.xml <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> </configuration> #4.4.配置mapred-site.xml <configuration> <property> <name>mapred.job.tracker</name> <value>hadoop:9001</value> <description>change your own hostname</description> </property> </configuration> 9.64位系统错误问题处理 ##安装Hadoop启动之后总有警告: ##Unable to load native-hadoop library for your platform... using builtin-Javaclasses where applicable ##这是因为在64位的linux系统中运行hadoop不兼容。 ##这时候将准备好的64位的lib包解压到已经安装好的hadoop的lib目录下 #注意:是lib目录,而不是lib下的native目录 ##执行如下命令: #tar -x hadoop-native-64-2.4.0.tar -C hadoop/lib/ cd /usr/local mv hadoop-native-64-2.6.0.tar hadoop/lib cd /usr/local/hadoop/lib tar -xvf hadoop-native-64-2.6.0.tar #然后在环境变量中添加如下内容: vi /etc/profile export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" #最后使环境变量生效 source /etc/profile 5.对hadoop进行格式化 hadoop namenode -format 6.启动hadoop start-all.sh 7.验证hadoop是否安装成功: 输入命令:jps #发现有五个java进程: DataNode NameNode SecondaryNameNode JobTracker TaskTracker #通过浏览器查看: HDFS: hadoop:50070 MapReduce: hadoop:50030 8.修改windows下的文件,即可在本地电脑查看: C:\Windows\System32\drivers\etc\hosts 10.错误处理办法 如果在windows中页面不能成功,有肯能 NameNode进程启动没有成功? 1.没有格式化 2.配置文件 3.hostname没有与ip绑定 4.SSH的免密码登录没有配置成功 #多次格式化也是错误的 方法:删除/usr/local/hadoop/tmp文件夹,重新格式化 #---------------------hive安装---------------------------- 1.上传hive安装包到/usr/local目录下 cd /usr/local 2.解压hive安装包 tar -zxvf hive-0.9.0.tar.gz mv hive-0.9.0 hive 3.配置hive环境变量 vi /etc/profile export HIVE_HOME=/usr/local/hive export PATH=.:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH source /etc/profile 4.上传驱动到/usr/local目录下并添加驱动到hive的lib目录下 cd /usr/local cp mysql-connector-java-5.1.39-bin.jar /usr/local/hive/lib/ 5.hive安装参数配置 #修改hive-env.sh文件,添加hadoop的环境变量 cd /usr/local/hive/conf cp hive-env.sh.template hive-env.sh vi hive-env.sh export HADOOP_HOME=/usr/local/hadoop #修改hive-log4j.properties文件 cd /usr/local/hive/conf cp hive-log4j.properties.template hive-log4j.properties vi hive-log4j.properties #log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter #修改hive-site.xml文件 cd /usr/local/hive/conf cp hive-default.xml.template hive-site.xml vi hive-site.xml #添加如下内容: <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://hadoop:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>root</value> <description>password to use against metastore database</description> </property> 6.验证hive安装是否成功 hive show databases; mysql -uroot -proot show databases; use hive; show tables; 7.上传hive-builtins-0.9.0.jar包到hdfs的/usr/local/hive/lib/目录下 cd /usr/local/hive/lib hdfs dfs -mkdir -p /usr/local/hive/lib hdfs dfs -put hive-builtins-0.9.0.jar /usr/local/hive/lib #--------------------------DB2安装-----------------------------------# 1.关闭内存地址随机化机制 vi /etc/sysctl.conf 增加 kernel.randomize_va_space=0 sysctl -p 2.上传db2安装包并解压安装 mkdir /home/db2 cd /home/db2 tar -xvf v9.7fp9_linuxx64_server.tar.gz cd ./server ./db2_install #开始安装db2,选择ESE企业版安装 3.配置实例用户,root用户执行 mkdir -p /db2home groupadd -g 607 dagadm groupadd -g 608 dagfenc useradd -u 607 -g dagadm -d /db2home/dainst -m dainst useradd -u 608 -g dagfenc -d /home/dafenc -m dafenc useradd -u 609 -g dagadm -d /db2home/dw -m dw passwd dainst dainst passwd dafenc dafenc passwd dw dw 4.修改配置文件,添加端口号 vi /etc/services db2c_dainst 62000/tcp DB2_dainst 62001/tcp DB2_dainst_1 62002/tcp DB2_dainst_2 62003/tcp DB2_dainst_END 62004/tcp 5.新建实例 cd /opt/ibm/db2/V9.7/instance ./db2icrt -s ese -u dafenc dainst 6.数据库配置,dainst用户执行 su - dainst db2set DB2CODEPAGE=1386 db2set DB2COMM=TCPIP db2set DB2_PARALLEL_IO=* db2 update dbm cfg using SVCENAME db2c_dainst db2 get dbm cfg|grep -w SVCENAME cat /db2home/dainst/sqllib/db2nodes.cfg db2start 7.创建DWMM数据库 mkdir -p /db2home/dainst/etldb db2 "CREATE DATABASE dwmm AUTOMATIC STORAGE NO ON /db2home/dainst/etldb USING CODESET GBK TERRITORY CN RESTRICTIVE"
#mysql导入数据 --全量替换数据 load data local infile "/home/python/data/stock.csv" replace into table stock CHARACTER SET gb2312 FIELDS TERMINATED BY "," LINES TERMINATED BY "\n"; --新增数据 load data local infile \'/etl/etldata/input/20170216/industry_20170216.csv\' into table ext_stock_industry fields terminated by "," lines terminated by \'\n\'; #将ext外表指向相应的目录 "ALTER TABLE ext_${tab_name} SET LOCATION \'hdfs://nameservice1/etldata/input/${tab_name}\'" ## 清空STG增量表 ### echo "`dt` :start truncate table STG_${tab_name}" ${TDH} "truncate table STG_${tab_name}" [ $? -ne 0 ] && echo "`dt` :truncate table STG_${tab_name} failed " && exit -1 echo "`dt` :truncate Table STG_${tab_name} succeed " ## 将每日增量数据装入STG表 ### echo "`dt` :start insert STG_${tab_name} from EXT_${tab_name}" ${TDH} "insert into STG_${tab_name} select A.*,${today} from EXT_${tab_name} A" [ $? -ne 0 ] && echo "`dt` :insert STG_${tab_name} failed " && exit -1 echo "`dt` :insert into Table STG_${tab_name} succeed " return 0 #清空调度环境 1.进入清空etl调度表脚本目录 cd /home/dainst/tmp/jjm/init 2.执行rollback.sh脚本清空环境 ./rollback.sh #配置作业流 1.配置db2驱动 在本机运行: C:\Windows\SysWOW64\odbcad32.exe 数据源名称:dydwmm 用户标识:dainst 密码:dainst IP:192.168.101.90 端口号:61000 2.打开excel提交作业流 #在本机打开excel文件 C:\Users\Idea\Desktop\数据项目\05-大数据平台\02-数据处理\ETL\大数据平台部署\06-相关文档 #删除右侧文件 分别提交3个sheet页 3.检查作业配置是否成功 #在waterdrop中执行如下操作检查: SELECT * FROM etl.job_loc; SELECT * FROM etl.job_metadata; SELECT * FROM etl.job_seq; #查看跑批日期 SELECT * FROM etl.job_biz_date; 4.清空历史目录 #清空省联社下发文件目录 rm -rf /etl/etldata/input/S-999000/CCRD/ADD/* rm -rf /etl/etldata/input/S-999000/CORE/ADD/* rm -rf /etl/etldata/input/S-999000/FMS/ADD/* rm -rf /etl/etldata/input/S-999000/FMS/ADD/* #清空dw数据库导出数据目录 rm -rf /etl/etldata/input/DW_TO_HIVE/* 5.开启调度 cd /etl/etldata/script/job-schd-engine-0.1Silver/bin #先停止调度 mv nohup.out nohup.out_tmp #配置调度启动时间 cd /etl/etldata/script/job-schd-engine-0.1Silver/config vi quartz_jobs.xml #开启调度 cd /etl/etldata/script/job-schd-engine-0.1Silver/bin nohup start.sh & 6.前台查看调度页面 推送省联社下发文件 cd /home/dainst/tmp/jjm/Note/CORE ./tools.sh 20170221 FMS ./tools.sh 20170221 IMBS ./tools.sh 20170221 CCRD ./tools.sh 20170221 CORE DW库中握手文件 cd /etl/etldata/input/DW_TO_HIVE/20170220 touch HANDFILE.OK 7.第一个批成功跑完 修改视图 drop view ETL.JOB_BIZ_DATE; CREATE VIEW JOB_BIZ_DATE( CUR_DT, LAST_DT, PPN_TSTMP ) AS SELECT \'2017-02-22\' AS CUR_DT, \'2016-02-21\' AS LAST_DT, CURRENT TIMESTAMP AS PPN_TSTMP FROM CORE.BGFMCINF WHERE ETL_FLAG = \'I\' select * from ETL.JOB_BIZ_DATE 修改时间配置,实例化第二批次 cd /etl/etldata/script/job-schd-engine-0.1Silver/config vi quartz_jobs.xml 创建压缩文件,并推送到对应目录上 cd /home/dainst/tmp/jjm/Note/CORE ./tools.sh 20170221 FMS ./tools.sh 20170221 IMBS ./tools.sh 20170221 CCRD ./tools.sh 20170221 CORE 查看前台调度页面 省联社文件解压成功,后续作业成功执行 1.在前台页面修改FILE_CHECK_CRM作业名称为FILE_CHECK_DW 调度管理-作业管理-作业场景选择FILE-查询 选中FILE_CHECK_CRM作业,点击编辑按钮 将FILE_CHECK_CRM作业的作业名称和互斥组改为FILE_CHECK_DW select * from ETL.JOB_METADATA WHERE job_tp = \'FILE\'; FILE_CHECK_CRM已经消失,变为FILE_CHECK_DW select * from ETL.JOB_log WHERE job_nm LIKE \'FILE%\'; 4839948 WAITING FILE_CHECK_CRM 2017-02-21 2.给DW抽数据握手文件,观察调度能否继续执行 touch /etl/etldata/input/DW_TO_HIVE/20170221/HANDFILE.OK 如果实例化批之后,修改了作业名称,作业不能继续往下执行 在实例化作业之前,修改作业名称查看作业能否执行 该作业能继续执行,但依赖于该作业的后续作业也会执行 #一个作业依赖于一个不存在的作业,看看该作业能否执行??? CMD_CHK_COMMUNICATE_FILE作业依赖于FILE_CHECK_CRM,在未实例化批之前, 在前台将FILE_CHECK_CRM改名为FILE_CHECK_DW。 FILE_CHECK_DW能继续执行 CMD_CHK_COMMUNICATE_FILE也会继续执行。 #前台删除作业 修改视图 drop view ETL.JOB_BIZ_DATE; CREATE VIEW JOB_BIZ_DATE( CUR_DT, LAST_DT, PPN_TSTMP ) AS SELECT \'2017-02-20\' AS CUR_DT, \'2016-02-19\' AS LAST_DT, CURRENT TIMESTAMP AS PPN_TSTMP FROM CORE.BGFMCINF WHERE ETL_FLAG = \'I\' select * from ETL.JOB_BIZ_DATE; 修改时间配置,实例化第二批次 cd /etl/etldata/script/job-schd-engine-0.1Silver/config vi quartz_jobs.xml 创建压缩文件,并推送到对应目录上 cd /home/dainst/tmp/jjm/Note/CORE ./tools.sh 20170220 FMS ./tools.sh 20170220 IMBS ./tools.sh 20170220 CCRD ./tools.sh 20170220 CORE 4. SELECT * FROM etl.job_seq WHERE PRE_JOB=\'CMD_CHK_COMMUNICATE_FILE\'; update etl.job_seq set PRE_JOB=\'FILE_CHECK_DW\' where PRE_JOB=\'CMD_CHK_COMMUNICATE_FILE\'; SELECT * FROM etl.job_seq WHERE PRE_JOB=\'FILE_CHECK_DW\'; ------------------------------------------- 233数据库中: SELECT count(*) FROM S_CRM_CUST_PE_BASE_INFO; --1829596条 90数据库中 SELECT count(*) FROM ext_s_crm_cust_pe_base_info; --1829596条 SELECT count(*) FROM stg_s_crm_cust_pe_base_info; --1829596条 SELECT count(*) FROM sdi_s_crm_cust_pe_base_info; --2005970条 SELECT count(*) FROM his_s_crm_cust_pe_base_info; --0条 重做作业: 再次查询数据库信息: #备份bdpdb中数据库的s_crm_cust_pe_base_info表 drop table if exists sdi_s_crm_cust_pe_base_info_20170224; CREATE table sdi_s_crm_cust_pe_base_info_20170224 AS SELECT * FROM sdi_s_crm_cust_pe_base_info; select count(*) from sdi_s_crm_cust_pe_base_info_20170224; drop table if exists stg_s_crm_cust_pe_base_info_20170224; CREATE table stg_s_crm_cust_pe_base_info_20170224 AS SELECT * FROM stg_s_crm_cust_pe_base_info; select count(*) from stg_s_crm_cust_pe_base_info_20170224; drop table if exists ext_s_crm_cust_pe_base_info_20170224; CREATE table ext_s_crm_cust_pe_base_info_20170224 AS SELECT * FROM ext_s_crm_cust_pe_base_info; drop table if exists his_s_crm_cust_pe_base_info_20170224; CREATE table his_s_crm_cust_pe_base_info_20170224 AS SELECT * FROM his_s_crm_cust_pe_base_info; select count(*) from his_s_crm_cust_pe_base_info_20170224; #创建新表 DROP TABLE SDI_S_CRM_CUST_PE_BASE_INFO; CREATE TABLE IF NOT EXISTS SDI_S_CRM_CUST_PE_BASE_INFO ( CUST_NO VARCHAR(32) comment "CRM客户号", CERT_TYPE VARCHAR(8) comment "证件类型(清洗后的)", CERT_NO VARCHAR(20) comment "证件号(清洗后的)", PECUST_NAME VARCHAR(80) comment "客户名称", CUSTNAME_SHORT VARCHAR(40) comment "客户简称", CUST_NAME_EN VARCHAR(100) comment "客户名称(英文)", VIP_CATE VARCHAR(8) comment "贵宾类别", COUNTRY VARCHAR(8) comment "国家", CREDIT_LEVEL VARCHAR(8) comment "信用等级", IS_FREETAX VARCHAR(8) comment "免税标识", BTFLBOOL VARCHAR(8) comment "移行客户标志", DIMABOOL VARCHAR(8) comment "是否接收推广信息", SEX VARCHAR(8) comment "性别", NTY VARCHAR(8) comment "民族", MRG VARCHAR(8) comment "婚姻状况", BTD VARCHAR(10) comment "出生日期", STUDY_EXP VARCHAR(8) comment "最高学历", DEGREE VARCHAR(8) comment "最高学位", PAY_ACCT VARCHAR(32) comment "工资账号", ACCT_WT_BK VARCHAR(80) comment "开户银行", YEAR_INCOME DECIMAL(16,2) comment "年收入", FMY_PPL INTEGER comment "家庭人数", INHBT_STAT VARCHAR(8) comment "居住状况", CUST_LEV VARCHAR(8) comment "客户层次", BANK_EMP_IND VARCHAR(8) comment "本行员工标志", EMPLOYEE_TYP VARCHAR(8) comment "员工类别", BANK_STK_HOLDER_IND VARCHAR(8) comment "是否本行股东", BANK_PARTY_IND VARCHAR(8) comment "是否本行关系人", NOTES VARCHAR(200) comment "本行关系人备注", XDZX_TYPE VARCHAR(8) comment "客户信贷类型", CUST_QLY VARCHAR(8) comment "客户性质", PERSONAL_INSRC VARCHAR(8) comment "人身保险", INSURANCE_DT VARCHAR(10) comment "保险期限", BAD_RECORD DECIMAL(5,0) comment "不良记录笔数", IS_LTC_CREDIT_CUST VARCHAR(8) comment "是否潜在信贷客户", NATIVE VARCHAR(200) comment "籍贯", EFF_DATE VARCHAR(8) comment "批量日期", END_DATE VARCHAR(8) comment "数据失效日期", JOB_SEQ_ID VARCHAR(8) comment "批次号" ) comment "对私客户基本信息" CLUSTERED BY (CUST_NO) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); DROP TABLE STG_S_CRM_CUST_PE_BASE_INFO; CREATE TABLE IF NOT EXISTS STG_S_CRM_CUST_PE_BASE_INFO ( CUST_NO string comment "CRM客户号", CERT_TYPE string comment "证件类型(清洗后的)", CERT_NO string comment "证件号(清洗后的)", PECUST_NAME string comment "客户名称", CUSTNAME_SHORT string comment "客户简称", CUST_NAME_EN string comment "客户名称(英文)", VIP_CATE string comment "贵宾类别", COUNTRY string comment "国家", CREDIT_LEVEL string comment "信用等级", IS_FREETAX string comment "免税标识", BTFLBOOL string comment "移行客户标志", DIMABOOL string comment "是否接收推广信息", SEX string comment "性别", NTY string comment "民族", MRG string comment "婚姻状况", BTD string comment "出生日期", STUDY_EXP string comment "最高学历", DEGREE string comment "最高学位", PAY_ACCT string comment "工资账号", ACCT_WT_BK string comment "开户银行", YEAR_INCOME string comment "年收入", FMY_PPL string comment "家庭人数", INHBT_STAT string comment "居住状况", CUST_LEV string comment "客户层次", BANK_EMP_IND string comment "本行员工标志", EMPLOYEE_TYP string comment "员工类别", BANK_STK_HOLDER_IND string comment "是否本行股东", BANK_PARTY_IND string comment "是否本行关系人", NOTES string comment "本行关系人备注", XDZX_TYPE string comment "客户信贷类型", CUST_QLY string comment "客户性质", PERSONAL_INSRC string comment "人身保险", INSURANCE_DT string comment "保险期限", BAD_RECORD string comment "不良记录笔数", IS_LTC_CREDIT_CUST string comment "是否潜在信贷客户", ETL_BIZ_DT string comment "ETL插入日期", ETL_LOAD_DT string comment "ETL更新日期", NATIVE string comment "籍贯", ETL_EFF_DATE VARCHAR(8) comment "批量日期" ) comment "对私客户基本信息" CLUSTERED BY (CUST_NO) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); DROP TABLE HIS_S_CRM_CUST_PE_BASE_INFO; CREATE TABLE IF NOT EXISTS HIS_S_CRM_CUST_PE_BASE_INFO ( CUST_NO VARCHAR(32) comment "CRM客户号", CERT_TYPE VARCHAR(8) comment "证件类型(清洗后的)", CERT_NO VARCHAR(20) comment "证件号(清洗后的)", PECUST_NAME VARCHAR(80) comment "客户名称", CUSTNAME_SHORT VARCHAR(40) comment "客户简称", CUST_NAME_EN VARCHAR(100) comment "客户名称(英文)", VIP_CATE VARCHAR(8) comment "贵宾类别", COUNTRY VARCHAR(8) comment "国家", CREDIT_LEVEL VARCHAR(8) comment "信用等级", IS_FREETAX VARCHAR(8) comment "免税标识", BTFLBOOL VARCHAR(8) comment "移行客户标志", DIMABOOL VARCHAR(8) comment "是否接收推广信息", SEX VARCHAR(8) comment "性别", NTY VARCHAR(8) comment "民族", MRG VARCHAR(8) comment "婚姻状况", BTD VARCHAR(10) comment "出生日期", STUDY_EXP VARCHAR(8) comment "最高学历", DEGREE VARCHAR(8) comment "最高学位", PAY_ACCT VARCHAR(32) comment "工资账号", ACCT_WT_BK VARCHAR(80) comment "开户银行", YEAR_INCOME DECIMAL(16,2) comment "年收入", FMY_PPL INTEGER comment "家庭人数", INHBT_STAT VARCHAR(8) comment "居住状况", CUST_LEV VARCHAR(8) comment "客户层次", BANK_EMP_IND VARCHAR(8) comment "本行员工标志", EMPLOYEE_TYP VARCHAR(8) comment "员工类别", BANK_STK_HOLDER_IND VARCHAR(8) comment "是否本行股东", BANK_PARTY_IND VARCHAR(8) comment "是否本行关系人", NOTES VARCHAR(200) comment "本行关系人备注", XDZX_TYPE VARCHAR(8) comment "客户信贷类型", CUST_QLY VARCHAR(8) comment "客户性质", PERSONAL_INSRC VARCHAR(8) comment "人身保险", INSURANCE_DT VARCHAR(10) comment "保险期限", BAD_RECORD DECIMAL(5,0) comment "不良记录笔数", IS_LTC_CREDIT_CUST VARCHAR(8) comment "是否潜在信贷客户", NATIVE VARCHAR(200) comment "籍贯", EFF_DATE VARCHAR(8) comment "批量日期", END_DATE VARCHAR(8) comment "数据失效日期", JOB_SEQ_ID VARCHAR(8) comment "批次号" ,NEW_JOB_SEQ_ID VARCHAR(8) comment "历史批次号" ) comment "对私客户基本信息" CLUSTERED BY (CUST_NO) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); DROP TABLE EXT_S_CRM_CUST_PE_BASE_INFO; CREATE EXTERNAL TABLE IF NOT EXISTS EXT_S_CRM_CUST_PE_BASE_INFO ( CUST_NO string comment "CRM客户号", CERT_TYPE string comment "证件类型(清洗后的)", CERT_NO string comment "证件号(清洗后的)", PECUST_NAME string comment "客户名称", CUSTNAME_SHORT string comment "客户简称", CUST_NAME_EN string comment "客户名称(英文)", VIP_CATE string comment "贵宾类别", COUNTRY string comment "国家", CREDIT_LEVEL string comment "信用等级", IS_FREETAX string comment "免税标识", BTFLBOOL string comment "移行客户标志", DIMABOOL string comment "是否接收推广信息", SEX string comment "性别", NTY string comment "民族", MRG string comment "婚姻状况", BTD string comment "出生日期", STUDY_EXP string comment "最高学历", DEGREE string comment "最高学位", PAY_ACCT string comment "工资账号", ACCT_WT_BK string comment "开户银行", YEAR_INCOME string comment "年收入", FMY_PPL string comment "家庭人数", INHBT_STAT string comment "居住状况", CUST_LEV string comment "客户层次", BANK_EMP_IND string comment "本行员工标志", EMPLOYEE_TYP string comment "员工类别", BANK_STK_HOLDER_IND string comment "是否本行股东", BANK_PARTY_IND string comment "是否本行关系人", NOTES string comment "本行关系人备注", XDZX_TYPE string comment "客户信贷类型", CUST_QLY string comment "客户性质", PERSONAL_INSRC string comment "人身保险", INSURANCE_DT string comment "保险期限", BAD_RECORD string comment "不良记录笔数", IS_LTC_CREDIT_CUST string comment "是否潜在信贷客户", ETL_BIZ_DT string comment "ETL插入日期", ETL_LOAD_DT string comment "ETL更新日期", NATIVE string comment "籍贯" ) comment "对私客户基本信息" ROW FORMAT SERDE \'org.apache.hadoop.hive.ql.io.csv.serde.CSVSerde\' STORED AS INPUTFORMAT \'org.apache.hadoop.mapred.TextInputFormat\' OUTPUTFORMAT \'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\' LOCATION \'/etldata\'; 跑20170220批数据,查询数据量 #查看新表数据量 SELECT count(*) FROM ext_s_crm_cust_pe_base_info; --1829596 SELECT count(*) FROM stg_s_crm_cust_pe_base_info; --1829696 SELECT count(*) FROM sdi_s_crm_cust_pe_base_info; --1829596 SELECT count(*) FROM his_s_crm_cust_pe_base_info; --0 重跑20170220批作业,所有作业成功跑批,再次查看数据量 SELECT count(*) FROM ext_s_crm_cust_pe_base_info; --1829596 SELECT count(*) FROM stg_s_crm_cust_pe_base_info; --1829696 SELECT count(*) FROM sdi_s_crm_cust_pe_base_info; --1829596 SELECT count(*) FROM his_s_crm_cust_pe_base_info; --0
use stock; load data local infile \'/home/crawblog/data/FILE\' into table blog fields terminated by "," lines terminated by \'\n\';
create or replace procedure bdpdb.sp_idv_cst_smy_bk_tl_busi() IS trc_tab STRING BEGIN trc_tab:="truncate table bdpdb.IDV_CST_SMY_BK_TL_BUSI ;"; EXECUTE IMMEDIATE trc_tab; insert into bdpdb.IDV_CST_SMY_BK_TL_BUSI select date_dt --日期 ,branch_id --机构号 ,counter_no --柜员号 ,tel_txid --交易源代码 ,NVL(LAG(tran_time,1) OVER(partition by date_dt,counter_no,branch_id,process_time,process_end_time order by date_dt,branch_id,counter_no,process_time,process_end_time,tran_time),process_time) start_time --交易起始时间 ,tran_time --交易结束时间 from( select A.date_dt --日期 ,A.counter_no --柜员号 ,A.branch_id --机构号 ,B.tel_txid --交易源代码 ,A.process_time --叫号起始时间 ,B.tran_time --交易时间 ,A.process_end_time --叫号结束时间 from ( select substr(A.process_time,1,4)||substr(A.process_time,6,2)||substr(A.process_time,9,2) date_dt ,A.BRANCH_ID ,A.WIN_NO ,B.counter_no ,TDH_TODATE(TIMESTAMP(A.PROCESS_TIME) + interval \'180\' SECOND,\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMddHHmmss\') PROCESS_TIME --叫号时间 ,TDH_TODATE(TIMESTAMP(A.process_end_time) + interval \'180\' SECOND,\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMddHHmmss\') process_end_time--业务办理结束时间 from bdpdb.SDI_T_CUST_QUEUE A left join ( --去除一个柜员多个窗口情况 SELECT A.date_dt,A.department BRANCH_ID ,B.win_no,A.counter_no FROM ( select date_dt ,department ,counter_no from bdpdb.SDI_BH_COUNTER GROUP BY date_dt,department,counter_no having count(win_no)=1 ) A LEFT JOIN bdpdb.SDI_BH_COUNTER B ON A.date_dt=B.date_dt AND A.department=B.department AND A.counter_no=B.counter_no order BY 1,2,3,4 ) B on substr(A.process_time,1,4)||substr(A.process_time,6,2)||substr(A.process_time,9,2)=B.date_dt and A.BRANCH_ID=B.BRANCH_ID and A.WIN_NO=B.WIN_NO where length(B.counter_no)=7 order by 1,2,3,4,5,6 ) A right join ( select distinct SYS_DATE,TEL_ID,TEL_TXID,TRAN_TIME from ( select SYS_DATE --交易日期 ,TEL_ID --交易柜员号 ,TEL_TXID --柜员输入原交易码 ,SYS_DATE||SYS_TIME TRAN_TIME--交易时间 from bdpdb.SDI_F_CORE_BYFTJRN where tx_ouno like \'89120%\' and substr(TEL_ID,1,3)=\'891\' AND SYS_DATE>=\'20161001\' and SYS_DATE<=\'20161122\' order by 1,2,4,3) ) B on A.date_dt=B.sys_date and A.counter_no=B.tel_id and B.tran_time >= A.process_time and B.tran_time <= A.process_end_time where A.date_dt is not NULL order by 1,2,3,5,7,6,4 ) END
CREATE DATABASE IF NOT EXISTS bdpdb; USE bdpdb; DROP TABLE IF NOT EXISTS bdpdb.idv_cst_smy_bk_tl_busi; create table bdpdb.idv_cst_smy_bk_tl_busi( date_dt string comment "日期" , branch_id string comment "机构号" , counter_no string comment "柜员号" , tel_txid string comment "交易源代码" , start_time varchar(50) comment "交易起始时间" , tran_time varchar(50) comment "交易结束时间" ) CLUSTERED BY (date_dt) INTO 11 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true");
# /etc/profile #配置java环境变量 export JAVA_HOME=/usr/local/jdk export JAVA_BIN=$JAVA_HOME/bin export JAVA_LIB=$JAVA_HOME/lib export CLASSPATH=.:$JAVA_LIB/tools.jar:$JAVA_LIB/dt.jar #配置HADOOP环境变量 export HADOOP_HOME=/usr/local/hadoop export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" #配置HIVE环境变量 export HIVE_HOME=/usr/local/hive #配置zookeeper环境变量 export ZOOKEEPER_HOME=/usr/local/zookeeper #配置hbase的环境变量 export HBASE_HOME=/usr/local/hbase export PATH=.:$HBASE_HOME/bin:$ZOOKEEPER_HOME/bin:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_BIN:$PATH #设置时区 TZ=\'Asia/Shanghai\' export TZ
版权声明:本文为Jims2016原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。