读取 vpnlog 日志文件,计算这一天,每个小时在线用户数和各个用户的在线总时长,在线次数,最大在线时长
【1】数据 文件vpnlog 数据如下
{"userName":"a","ts":"2020-04-07T00:12:02.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T00:25:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T01:45:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T03:15:23.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T04:25:57.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T05:04:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T07:08:32.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T08:09:00.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T12:15:43.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T16:35:18.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T19:48:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T21:25:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T21:35:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T21:40:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T22:15:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T23:17:21.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T00:25:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T01:45:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T03:15:23.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T04:25:57.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T05:04:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T07:08:32.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T10:08:32.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T12:15:43.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T16:35:18.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T19:48:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T21:25:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T21:35:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T21:50:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T22:15:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T23:17:21.000Z","type":"login"}
【2】Scala代码实现
1 package com.it.baizhan.scalacode.sparksql.examples 2 3 import java.text.SimpleDateFormat 4 import java.util.Calendar 5 6 import org.apache.spark.sql.SparkSession 7 8 import scala.collection.mutable.ListBuffer 9 10 /** 11 * 需求: 12 * 读取 vpnlog 日志文件,其中userName为用户名,ts为记录时间,当type为login时,为登入时间,type为logout为登出时间 13 * 问题: 14 * 1)这一天,每个小时在线用户数 15 * 2)计算这一天,各个用户的在线总时长,在线次数,最大在线时长 16 * (如果用户一天开始是登出记录,则认为他零点登入,如果一天结束时登入日志,则认为他24点登出) 17 * 要求使用SparkSQL实现,并给出计算逻辑说明 18 * 19 * SQL函数: 20 * replace(列,字符串1,字符串2) :对某列的数据查找字符串1替换成字符串2 21 */ 22 object VpnLog { 23 def main(args: Array[String]): Unit = { 24 val session = SparkSession.builder().master("local").appName("test").getOrCreate() 25 session.sparkContext.setLogLevel("Error") 26 //读取json格式的数据 27 val df = session.read.json("./data/vpnlog") 28 df.createTempView("temp1") 29 30 //对时间进行转换 31 session.sql( 32 """ 33 | select 34 | username,replace(replace(ts,"T"," "),".000Z","") as ts,type 35 | from temp1 36 """.stripMargin).createTempView("temp2") 37 38 /** 39 * +--------+-------------------+------+ 40 * |username|ts |type | 41 * +--------+-------------------+------+ 42 * |a |2020-04-07 00:12:02|logout| 43 * |a |2020-04-07 00:25:36|login | 44 * |a |2020-04-07 01:45:36|logout| 45 * |a |2020-04-07 03:15:23|login | 46 * |a |2020-04-07 04:25:57|logout| 47 * |a |2020-04-07 05:04:36|login | 48 * |a |2020-04-07 07:08:32|logout| 49 * |a |2020-04-07 08:09:00|login | 50 * |a |2020-04-07 12:15:43|logout| 51 * |a |2020-04-07 16:35:18|login | 52 * |a |2020-04-07 19:48:36|logout| 53 * |a |2020-04-07 21:25:36|login | 54 * |a |2020-04-07 21:35:36|logout| 55 * |a |2020-04-07 21:40:36|login | 56 * |a |2020-04-07 22:15:36|logout| 57 * |a |2020-04-07 23:17:21|login | 58 * |b |2020-04-07 00:25:36|login | 59 * |b |2020-04-07 01:45:36|logout| 60 * |b |2020-04-07 03:15:23|login | 61 * |b |2020-04-07 04:25:57|logout| 62 * |b |2020-04-07 05:04:36|login | 63 * |b |2020-04-07 07:08:32|logout| 64 * |b |2020-04-07 10:08:32|login | 65 * |b |2020-04-07 12:15:43|logout| 66 * |b |2020-04-07 16:35:18|login | 67 * |b |2020-04-07 19:48:36|logout| 68 * |b |2020-04-07 21:25:36|login | 69 * |b |2020-04-07 21:35:36|logout| 70 * |b |2020-04-07 21:50:36|login | 71 * |b |2020-04-07 22:15:36|logout| 72 * |b |2020-04-07 23:17:21|login | 73 * +--------+-------------------+------+ 74 */ 75 76 //给表中的数据打标号 77 session.sql( 78 """ 79 | select 80 | username,ts,type,row_number() over(partition by username order by ts ) as rank 81 | from temp2 82 """.stripMargin).createTempView("temp3") 83 84 /** 85 * +--------+-------------------+------+----+ 86 * |username|ts |type |rank| 87 * +--------+-------------------+------+----+ 88 * |b |2020-04-07 00:25:36|login |1 | 89 * |b |2020-04-07 01:45:36|logout|2 | 90 * |b |2020-04-07 03:15:23|login |3 | 91 * |b |2020-04-07 04:25:57|logout|4 | 92 * |b |2020-04-07 05:04:36|login |5 | 93 * |b |2020-04-07 07:08:32|logout|6 | 94 * |b |2020-04-07 10:08:32|login |7 | 95 * |b |2020-04-07 12:15:43|logout|8 | 96 * |b |2020-04-07 16:35:18|login |9 | 97 * |b |2020-04-07 19:48:36|logout|10 | 98 * |b |2020-04-07 21:25:36|login |11 | 99 * |b |2020-04-07 21:35:36|logout|12 | 100 * |b |2020-04-07 21:50:36|login |13 | 101 * |b |2020-04-07 22:15:36|logout|14 | 102 * |b |2020-04-07 23:17:21|login |15 | 103 * |a |2020-04-07 00:12:02|logout|1 | 104 * |a |2020-04-07 00:25:36|login |2 | 105 * |a |2020-04-07 01:45:36|logout|3 | 106 * |a |2020-04-07 03:15:23|login |4 | 107 * |a |2020-04-07 04:25:57|logout|5 | 108 * |a |2020-04-07 05:04:36|login |6 | 109 * |a |2020-04-07 07:08:32|logout|7 | 110 * |a |2020-04-07 08:09:00|login |8 | 111 * |a |2020-04-07 12:15:43|logout|9 | 112 * |a |2020-04-07 16:35:18|login |10 | 113 * |a |2020-04-07 19:48:36|logout|11 | 114 * |a |2020-04-07 21:25:36|login |12 | 115 * |a |2020-04-07 21:35:36|logout|13 | 116 * |a |2020-04-07 21:40:36|login |14 | 117 * |a |2020-04-07 22:15:36|logout|15 | 118 * |a |2020-04-07 23:17:21|login |16 | 119 * +--------+-------------------+------+----+ 120 */ 121 122 //进行自关联,错位匹配 123 session.sql( 124 """ 125 | select a.username as username1 ,a.ts as ts1,a.type as type1,a.rank as rank1, 126 | b.username as username2 ,b.ts as ts2,b.type as type2,b.rank as rank2 127 | from temp3 a full outer join temp3 b on a.username = b.username and a.rank = b.rank-1 128 | order by a.username,a.ts 129 """.stripMargin).createTempView("temp4") 130 131 /** 132 * +---------+-------------------+------+-----+---------+-------------------+------+-----+ 133 * |username1|ts1 |type1 |rank1|username2|ts2 |type2 |rank2| 134 * +---------+-------------------+------+-----+---------+-------------------+------+-----+ 135 * |null |null |null |null |b |2020-04-07 00:25:36|login |1 | 136 * |null |null |null |null |a |2020-04-07 00:12:02|logout|1 | 137 * |a |2020-04-07 00:12:02|logout|1 |a |2020-04-07 00:25:36|login |2 | 138 * |a |2020-04-07 00:25:36|login |2 |a |2020-04-07 01:45:36|logout|3 | 139 * |a |2020-04-07 01:45:36|logout|3 |a |2020-04-07 03:15:23|login |4 | 140 * |a |2020-04-07 03:15:23|login |4 |a |2020-04-07 04:25:57|logout|5 | 141 * |a |2020-04-07 04:25:57|logout|5 |a |2020-04-07 05:04:36|login |6 | 142 * |a |2020-04-07 05:04:36|login |6 |a |2020-04-07 07:08:32|logout|7 | 143 * |a |2020-04-07 07:08:32|logout|7 |a |2020-04-07 08:09:00|login |8 | 144 * |a |2020-04-07 08:09:00|login |8 |a |2020-04-07 12:15:43|logout|9 | 145 * |a |2020-04-07 12:15:43|logout|9 |a |2020-04-07 16:35:18|login |10 | 146 * |a |2020-04-07 16:35:18|login |10 |a |2020-04-07 19:48:36|logout|11 | 147 * |a |2020-04-07 19:48:36|logout|11 |a |2020-04-07 21:25:36|login |12 | 148 * |a |2020-04-07 21:25:36|login |12 |a |2020-04-07 21:35:36|logout|13 | 149 * |a |2020-04-07 21:35:36|logout|13 |a |2020-04-07 21:40:36|login |14 | 150 * |a |2020-04-07 21:40:36|login |14 |a |2020-04-07 22:15:36|logout|15 | 151 * |a |2020-04-07 22:15:36|logout|15 |a |2020-04-07 23:17:21|login |16 | 152 * |a |2020-04-07 23:17:21|login |16 |null |null |null |null | 153 * |b |2020-04-07 00:25:36|login |1 |b |2020-04-07 01:45:36|logout|2 | 154 * |b |2020-04-07 01:45:36|logout|2 |b |2020-04-07 03:15:23|login |3 | 155 * |b |2020-04-07 03:15:23|login |3 |b |2020-04-07 04:25:57|logout|4 | 156 * |b |2020-04-07 04:25:57|logout|4 |b |2020-04-07 05:04:36|login |5 | 157 * |b |2020-04-07 05:04:36|login |5 |b |2020-04-07 07:08:32|logout|6 | 158 * |b |2020-04-07 07:08:32|logout|6 |b |2020-04-07 10:08:32|login |7 | 159 * |b |2020-04-07 10:08:32|login |7 |b |2020-04-07 12:15:43|logout|8 | 160 * |b |2020-04-07 12:15:43|logout|8 |b |2020-04-07 16:35:18|login |9 | 161 * |b |2020-04-07 16:35:18|login |9 |b |2020-04-07 19:48:36|logout|10 | 162 * |b |2020-04-07 19:48:36|logout|10 |b |2020-04-07 21:25:36|login |11 | 163 * |b |2020-04-07 21:25:36|login |11 |b |2020-04-07 21:35:36|logout|12 | 164 * |b |2020-04-07 21:35:36|logout|12 |b |2020-04-07 21:50:36|login |13 | 165 * |b |2020-04-07 21:50:36|login |13 |b |2020-04-07 22:15:36|logout|14 | 166 * |b |2020-04-07 22:15:36|logout|14 |b |2020-04-07 23:17:21|login |15 | 167 * |b |2020-04-07 23:17:21|login |15 |null |null |null |null | 168 * +---------+-------------------+------+-----+---------+-------------------+------+-----+ 169 */ 170 171 //数据填补 172 session.sql( 173 """ 174 | select 175 | username1 ,ts1,type1,username2,ts2,type2 176 | from 177 | (select 178 | case when username1 is null then username2 else username1 end username1 , 179 | case when ts1 is null then concat(split(ts2," ")[0],\' 00:00:00\') else ts1 end ts1, 180 | case when type1 is null then \'login\' else type1 end type1, 181 | case when username2 is null then username1 else username2 end username2, 182 | case when ts2 is null then concat(split(ts1," ")[0],\' 23:59:59\') else ts2 end ts2, 183 | case when type2 is null then \'logout\' else type2 end type2 184 | from temp4) t 185 | where type1 = \'login\' and type2 = \'logout\' 186 """.stripMargin).createTempView("temp5") 187 188 /** 189 * +---------+-------------------+-----+---------+-------------------+------+ 190 * |username1|ts1 |type1|username2|ts2 |type2 | 191 * +---------+-------------------+-----+---------+-------------------+------+ 192 * |a |2020-04-07 00:00:00|login|a |2020-04-07 00:12:02|logout| 193 * |a |2020-04-07 00:25:36|login|a |2020-04-07 01:45:36|logout| 194 * |a |2020-04-07 03:15:23|login|a |2020-04-07 04:25:57|logout| 195 * |a |2020-04-07 05:04:36|login|a |2020-04-07 07:08:32|logout| 196 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout| 197 * |a |2020-04-07 16:35:18|login|a |2020-04-07 19:48:36|logout| 198 * |a |2020-04-07 21:25:36|login|a |2020-04-07 21:35:36|logout| 199 * |a |2020-04-07 21:40:36|login|a |2020-04-07 22:15:36|logout| 200 * |a |2020-04-07 23:17:21|login|a |2020-04-07 23:59:59|logout| 201 * |b |2020-04-07 00:25:36|login|b |2020-04-07 01:45:36|logout| 202 * |b |2020-04-07 03:15:23|login|b |2020-04-07 04:25:57|logout| 203 * |b |2020-04-07 05:04:36|login|b |2020-04-07 07:08:32|logout| 204 * |b |2020-04-07 10:08:32|login|b |2020-04-07 12:15:43|logout| 205 * |b |2020-04-07 16:35:18|login|b |2020-04-07 19:48:36|logout| 206 * |b |2020-04-07 21:25:36|login|b |2020-04-07 21:35:36|logout| 207 * |b |2020-04-07 21:50:36|login|b |2020-04-07 22:15:36|logout| 208 * |b |2020-04-07 23:17:21|login|b |2020-04-07 23:59:59|logout| 209 * +---------+-------------------+-----+---------+-------------------+------+ 210 */ 211 212 session.udf.register("myudf",(t:String,count:Int)=>{ 213 val list = new ListBuffer[String]() 214 val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") 215 val calendar: Calendar = Calendar.getInstance() 216 calendar.setTime(sdf.parse(t)) 217 for(i <- 0 to count){ 218 list.append(sdf.format(calendar.getTime)) 219 calendar.add(Calendar.HOUR,1) 220 } 221 list 222 }) 223 224 //对数据进行一对多转换 225 session.sql( 226 """ 227 | select username1 as username,ts1,type1,username2,ts2,type2,explode(myudf(ts1,hour(ts2)-hour(ts1))) as tt 228 | from temp5 229 """.stripMargin).createTempView("temp6") 230 231 /** 232 * +--------+-------------------+-----+---------+-------------------+------+-------------------+ 233 * |username|ts1 |type1|username2|ts2 |type2 |tt | 234 * +--------+-------------------+-----+---------+-------------------+------+-------------------+ 235 * |a |2020-04-07 00:00:00|login|a |2020-04-07 00:12:02|logout|2020-04-07 00:00:00| 236 * |a |2020-04-07 00:25:36|login|a |2020-04-07 01:45:36|logout|2020-04-07 00:25:36| 237 * |a |2020-04-07 00:25:36|login|a |2020-04-07 01:45:36|logout|2020-04-07 01:25:36| 238 * |a |2020-04-07 03:15:23|login|a |2020-04-07 04:25:57|logout|2020-04-07 03:15:23| 239 * |a |2020-04-07 03:15:23|login|a |2020-04-07 04:25:57|logout|2020-04-07 04:15:23| 240 * |a |2020-04-07 05:04:36|login|a |2020-04-07 07:08:32|logout|2020-04-07 05:04:36| 241 * |a |2020-04-07 05:04:36|login|a |2020-04-07 07:08:32|logout|2020-04-07 06:04:36| 242 * |a |2020-04-07 05:04:36|login|a |2020-04-07 07:08:32|logout|2020-04-07 07:04:36| 243 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout|2020-04-07 08:09:00| 244 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout|2020-04-07 09:09:00| 245 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout|2020-04-07 10:09:00| 246 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout|2020-04-07 11:09:00| 247 * |a |2020-04-07 08:09:00|login|a |2020-04-07 12:15:43|logout|2020-04-07 12:09:00| 248 * |a |2020-04-07 16:35:18|login|a |2020-04-07 19:48:36|logout|2020-04-07 16:35:18| 249 * |a |2020-04-07 16:35:18|login|a |2020-04-07 19:48:36|logout|2020-04-07 17:35:18| 250 * |a |2020-04-07 16:35:18|login|a |2020-04-07 19:48:36|logout|2020-04-07 18:35:18| 251 * |a |2020-04-07 16:35:18|login|a |2020-04-07 19:48:36|logout|2020-04-07 19:35:18| 252 * |a |2020-04-07 21:25:36|login|a |2020-04-07 21:35:36|logout|2020-04-07 21:25:36| 253 * |a |2020-04-07 21:40:36|login|a |2020-04-07 22:15:36|logout|2020-04-07 21:40:36| 254 * |a |2020-04-07 21:40:36|login|a |2020-04-07 22:15:36|logout|2020-04-07 22:40:36| 255 * |a |2020-04-07 23:17:21|login|a |2020-04-07 23:59:59|logout|2020-04-07 23:17:21| 256 * |b |2020-04-07 00:25:36|login|b |2020-04-07 01:45:36|logout|2020-04-07 00:25:36| 257 * |b |2020-04-07 00:25:36|login|b |2020-04-07 01:45:36|logout|2020-04-07 01:25:36| 258 * |b |2020-04-07 03:15:23|login|b |2020-04-07 04:25:57|logout|2020-04-07 03:15:23| 259 * |b |2020-04-07 03:15:23|login|b |2020-04-07 04:25:57|logout|2020-04-07 04:15:23| 260 * |b |2020-04-07 05:04:36|login|b |2020-04-07 07:08:32|logout|2020-04-07 05:04:36| 261 * |b |2020-04-07 05:04:36|login|b |2020-04-07 07:08:32|logout|2020-04-07 06:04:36| 262 * |b |2020-04-07 05:04:36|login|b |2020-04-07 07:08:32|logout|2020-04-07 07:04:36| 263 * |b |2020-04-07 10:08:32|login|b |2020-04-07 12:15:43|logout|2020-04-07 10:08:32| 264 * |b |2020-04-07 10:08:32|login|b |2020-04-07 12:15:43|logout|2020-04-07 11:08:32| 265 * |b |2020-04-07 10:08:32|login|b |2020-04-07 12:15:43|logout|2020-04-07 12:08:32| 266 * |b |2020-04-07 16:35:18|login|b |2020-04-07 19:48:36|logout|2020-04-07 16:35:18| 267 * |b |2020-04-07 16:35:18|login|b |2020-04-07 19:48:36|logout|2020-04-07 17:35:18| 268 * |b |2020-04-07 16:35:18|login|b |2020-04-07 19:48:36|logout|2020-04-07 18:35:18| 269 * |b |2020-04-07 16:35:18|login|b |2020-04-07 19:48:36|logout|2020-04-07 19:35:18| 270 * |b |2020-04-07 21:25:36|login|b |2020-04-07 21:35:36|logout|2020-04-07 21:25:36| 271 * |b |2020-04-07 21:50:36|login|b |2020-04-07 22:15:36|logout|2020-04-07 21:50:36| 272 * |b |2020-04-07 21:50:36|login|b |2020-04-07 22:15:36|logout|2020-04-07 22:50:36| 273 * |b |2020-04-07 23:17:21|login|b |2020-04-07 23:59:59|logout|2020-04-07 23:17:21| 274 * +--------+-------------------+-----+---------+-------------------+------+-------------------+ 275 */ 276 277 //对时间进行转换 278 session.sql( 279 """ 280 | select distinct username, from_unixtime(unix_timestamp(tt,\'yyyy-MM-dd HH:mm:ss\'),\'yyyy-MM-dd HH\') as transtime 281 | from temp6 282 | order by username,transtime 283 """.stripMargin).createTempView("temp7") 284 285 /** 286 * +--------+-------------+ 287 * |username|transtime | 288 * +--------+-------------+ 289 * |a |2020-04-07 00| 290 * |a |2020-04-07 01| 291 * |a |2020-04-07 03| 292 * |a |2020-04-07 04| 293 * |a |2020-04-07 05| 294 * |a |2020-04-07 06| 295 * |a |2020-04-07 07| 296 * |a |2020-04-07 08| 297 * |a |2020-04-07 09| 298 * |a |2020-04-07 10| 299 * |a |2020-04-07 11| 300 * |a |2020-04-07 12| 301 * |a |2020-04-07 16| 302 * |a |2020-04-07 17| 303 * |a |2020-04-07 18| 304 * |a |2020-04-07 19| 305 * |a |2020-04-07 21| 306 * |a |2020-04-07 22| 307 * |a |2020-04-07 23| 308 * |b |2020-04-07 00| 309 * |b |2020-04-07 01| 310 * |b |2020-04-07 03| 311 * |b |2020-04-07 04| 312 * |b |2020-04-07 05| 313 * |b |2020-04-07 06| 314 * |b |2020-04-07 07| 315 * |b |2020-04-07 10| 316 * |b |2020-04-07 11| 317 * |b |2020-04-07 12| 318 * |b |2020-04-07 16| 319 * |b |2020-04-07 17| 320 * |b |2020-04-07 18| 321 * |b |2020-04-07 19| 322 * |b |2020-04-07 21| 323 * |b |2020-04-07 22| 324 * |b |2020-04-07 23| 325 * +--------+-------------+ 326 */ 327 328 //统计每个小时段在线的用户数 329 session.sql( 330 """ 331 | select transtime,count(username) as usercount 332 | from temp7 333 | group by transtime 334 | order by transtime 335 """.stripMargin) 336 337 /** 338 * +-------------+---------+ 339 * | transtime|usercount| 340 * +-------------+---------+ 341 * |2020-04-07 00| 2| 342 * |2020-04-07 01| 2| 343 * |2020-04-07 03| 2| 344 * |2020-04-07 04| 2| 345 * |2020-04-07 05| 2| 346 * |2020-04-07 06| 2| 347 * |2020-04-07 07| 2| 348 * |2020-04-07 08| 1| 349 * |2020-04-07 09| 1| 350 * |2020-04-07 10| 2| 351 * |2020-04-07 11| 2| 352 * |2020-04-07 12| 2| 353 * |2020-04-07 16| 2| 354 * |2020-04-07 17| 2| 355 * |2020-04-07 18| 2| 356 * |2020-04-07 19| 2| 357 * |2020-04-07 21| 2| 358 * |2020-04-07 22| 2| 359 * |2020-04-07 23| 2| 360 * +-------------+---------+ 361 */ 362 363 //统计每次访问的时长 , 分钟为单位 364 session.sql( 365 """ 366 | select 367 | username1 as username ,ts1,ts2, 368 | cast (((hour(ts2)*60*60+minute(ts2)*60+second(ts2)*1) - (hour(ts1)*60*60+minute(ts1)*60+second(ts1)*1))/60 as int) as dur 369 | from temp5 370 """.stripMargin).createTempView("temp10") 371 372 /** 373 * +--------+-------------------+-------------------+---+ 374 * |username|ts1 |ts2 |dur| 375 * +--------+-------------------+-------------------+---+ 376 * |a |2020-04-07 00:00:00|2020-04-07 00:12:02|12 | 377 * |a |2020-04-07 00:25:36|2020-04-07 01:45:36|80 | 378 * |a |2020-04-07 03:15:23|2020-04-07 04:25:57|70 | 379 * |a |2020-04-07 05:04:36|2020-04-07 07:08:32|123| 380 * |a |2020-04-07 08:09:00|2020-04-07 12:15:43|246| 381 * |a |2020-04-07 16:35:18|2020-04-07 19:48:36|193| 382 * |a |2020-04-07 21:25:36|2020-04-07 21:35:36|10 | 383 * |a |2020-04-07 21:40:36|2020-04-07 22:15:36|35 | 384 * |a |2020-04-07 23:17:21|2020-04-07 23:59:59|42 | 385 * |b |2020-04-07 00:25:36|2020-04-07 01:45:36|80 | 386 * |b |2020-04-07 03:15:23|2020-04-07 04:25:57|70 | 387 * |b |2020-04-07 05:04:36|2020-04-07 07:08:32|123| 388 * |b |2020-04-07 10:08:32|2020-04-07 12:15:43|127| 389 * |b |2020-04-07 16:35:18|2020-04-07 19:48:36|193| 390 * |b |2020-04-07 21:25:36|2020-04-07 21:35:36|10 | 391 * |b |2020-04-07 21:50:36|2020-04-07 22:15:36|25 | 392 * |b |2020-04-07 23:17:21|2020-04-07 23:59:59|42 | 393 * +--------+-------------------+-------------------+---+ 394 */ 395 //统计各个指标 396 session.sql( 397 """ 398 | select 399 | username,sum(dur) as totaldur,count(*) as totalcount,max(dur) as maxdur 400 | from temp10 401 | group by username 402 """.stripMargin).show(100) 403 404 /** 405 * +--------+--------+----------+------+ 406 * |username|totaldur|totalcount|maxdur| 407 * +--------+--------+----------+------+ 408 * | b| 670| 8| 193| 409 * | a| 811| 9| 246| 410 * +--------+--------+----------+------+ 411 */ 412 413 } 414 415 }