7.1.1 不可变集合继承图
7.1.2 可变集合继承图
7.2.3 不可变数组与可变数组的转换
7.7.6 复杂 WordCount 案例
1)方式一
object TestWordCount {
def main(args: Array[String]): Unit = {
// 第一种方式(不通用)
val tupleList = List(("Hello Scala Spark World ", 4), ("Hello
Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
val stringList: List[String] = tupleList.map(t=>(t._1 + "
") * t._2)
//val words: List[String] =
stringList.flatMap(s=>s.split(" "))
val words: List[String] = stringList.flatMap(_.split(" "))
//在 map 中,如果传进来什么就返回什么,不要用_省略
val groupMap: Map[String, List[String]] =
words.groupBy(word=>word)
//val groupMap: Map[String, List[String]] =
words.groupBy(_)
// (word, list) => (word, count)
val wordToCount: Map[String, Int] = groupMap.map(t=>(t._1,
t._2.size))
val wordCountList: List[(String, Int)] =
wordToCount.toList.sortWith {
(left, right) => {
left._2 > right._2
}
}.take(3)
//tupleList.map(t=>(t._1 + " ") * t._2).flatMap(_.split("
")).groupBy(word=>word).map(t=>(t._1, t._2.size))
println(wordCountList)
}
}
2)方式二
object TestWordCount {
def main(args: Array[String]): Unit = {
val tuples = List(("Hello Scala Spark World", 4), ("Hello
Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
// (Hello,4),(Scala,4),(Spark,4),(World,4)
// (Hello,3),(Scala,3),(Spark,3)
// (Hello,2),(Scala,2)
// (Hello,1)
val wordToCountList: List[(String, Int)] = tuples.flatMap
{
t => {
val strings: Array[String] = t._1.split(" ")
strings.map(word => (word, t._2))
}
}
// Hello, List((Hello,4), (Hello,3), (Hello,2), (Hello,1))
// Scala, List((Scala,4), (Scala,3), (Scala,2)
// Spark, List((Spark,4), (Spark,3)
// Word, List((Word,4))
val wordToTupleMap: Map[String, List[(String, Int)]] =
wordToCountList.groupBy(t=>t._1)
val stringToInts: Map[String, List[Int]] =
wordToTupleMap.mapValues {
datas => datas.map(t => t._2)
}
stringToInts
val wordToCountMap: Map[String, List[Int]] =
wordToTupleMap.map {
t => {
(t._1, t._2.map(t1 => t1._2))
}
}
val wordToTotalCountMap: Map[String, Int] =
wordToCountMap.map(t=>(t._1, t._2.sum))
println(wordToTotalCountMap)
}
}