简单案例
import org.apache.spark.sql.SparkSession
import org.junit.Test
case class Person(id:Int,name:String,sex:String,age:Int)
class DataSetCreate {
val spark = SparkSession
.builder()
.appName("test")
.master("local[4]")
.getOrCreate()
import spark.implicits._
@Test
def createData():Unit={
val list = List(Person(1,"zhangsan","man",10),
Person(2,"zhang2","woman",66),
Person(3,"zhang3","man",70),
Person(4,"zhang4","man",22))
val df = list.toDF()
//TODO 1.将dataFrame/dataSet数据集注册成表
df.createOrReplaceTempView("person")
spark.sql(
"""
|select
|name,age
|from person
|where age >=30
|""".stripMargin).show()
}
}
写个小案例
- 获取每个单词的个数
数据源
hello java
spark hadoop flume kafka
hbase kafka flume hadoop
Lateral View explode(split(value," "))是列转行
import org.apache.spark.sql.SparkSession
import org.junit.Test
class DataSetCreate {
val spark = SparkSession
.builder()
.appName("test")
.master("local[4]")
.getOrCreate()
import spark.implicits._
@Test
def createData():Unit={
val ds = spark.read.textFile("src/main/resources/wc.txt")
ds.createOrReplaceTempView("wordCount")
spark.sql(
"""
|select
| wc,
| count(1)
|from wordCount Lateral View explode(split(value," ")) as wc
|group by wc;
|""".stripMargin).show()
}
}