拆分字符串为多列
- substring_index
select
substring_index(substring_index(tp, ';', 1), ';', -1) as type1,
substring_index(substring_index(tp, ';', 2), ';', -1) as type2,
substring_index(substring_index(tp, ';', 3), ';', -1) as type3
from (
select distinct(type) as tp
from poi
) as a
字符串打开为多行
- explode
select explode(educationDegree) from(
select educationList.educationDegree
from user_resume
where allEducations is not null
)
字符串匹配
- regexp
select * from tps
where type2 regexp '.*银行.*'
日期
spark sql参考文档1
SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss');
-- 1970-01-01 00:00:00
SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
SELECT from_utc_timestamp('2016-08-31', 'GMT+8');
-- 2016-08-31 09:00:00
SELECT hour('2009-07-30 12:58:59');
-- 12
select id
from user_resume
where resumeSource = 'renren'
and hour(from_utc_timestamp(from_unixtime(birthday/1000),'GMT+00:00')) != 0
limit 100
参考文献
-
Functions - Spark SQL, Built-in Functions. (2018). Spark.apache.org. Retrieved 25 September 2018, from https://spark.apache.org/docs/2.3.0/api/sql/index.html#from_unixtime ↩