7.7.5 普通 WordCount 案例
package chapter07
object TestWordCount__简单版 {
def main ( args: Array[ String] ) : Unit = {
val stringList = List ( "Hello Scala Hbase kafka" , "Hello Scala Hbase" , " Hello Scala" , "Hello" )
val wordList: List[ String] = stringList. flatMap ( str = > str. split ( " " ) ) ;
println ( wordList)
println ( "------------------------------------------------------------------------------------------" )
val wordSame: Map[ String, List[ String] ] = wordList. groupBy ( word = > word)
println ( wordSame)
println ( "------------------------------------------------------------------------------------------" )
val wordToCount: Map[ String, Int] = wordSame. map ( tuple= > ( tuple. _1, tuple. _2. size) )
println ( wordToCount)
println ( "------------------------------------------------------------------------------------------" )
val sortList: List[ ( String, Int) ] = wordToCount. toList. sortWith{
( A, B) = > {
A. _2> B. _2
}
}
println ( sortList)
println ( "------------------------------------------------------------------------------------------" )
val resThreeList: List[ ( String, Int) ] = sortList. take ( 3 )
println ( resThreeList)
}
}
7.7.6 复杂 WordCount 案例
TestWordCount__复杂版__方式01
package chapter07
object TestWordCount__复杂版__方式01 {
def main ( args: Array[ String] ) : Unit = {
val tupleList = List ( ( "Hello Scala Spark World " , 4 ) , ( "Hello Scala Spark" , 3 ) , ( " Hello Scala" , 2 ) , ( "Hello" , 1 ) )
val stringList: List[ String] = tupleList. map ( t = > ( t. _1 + " " ) * t. _2)
val words: List[ String] = stringList. flatMap ( s= > s. split ( " " ) )
val groupMap: Map[ String, List[ String] ] = words. groupBy ( word = > word)
val wordToCount: Map[ String, Int] = groupMap. map ( t= > ( t. _1, t. _2. size) )
val wordCountList: List[ ( String, Int) ] = wordToCount. toList. sortWith{
( left, right) = > {
left. _2 > right. _2
}
} . take ( 3 )
tupleList. map ( t= > ( t. _1+ " " ) * t. _2) . flatMap ( _. split ( " " ) . groupBy ( word= > word) . map ( t= > ( t. _1, t. _2. size) ) )
println ( wordCountList)
}
}
TestWordCount__复杂版__方式02
package chapter07
object TestWordCount__复杂版__方式02 {
def main ( args: Array[ String] ) : Unit = {
val tuples = List ( ( "Hello Scala Hbase kafka" , 4 ) , ( "Hello Scala Hbase" , 3 ) , ( " Hello Scala" , 2 ) , ( "Hello" , 1 ) )
val wordToCountList: List[ ( String, Int) ] = tuples. flatMap{
t = > {
val strings: Array[ String] = t. _1. split ( " " )
strings. map ( word= > ( word, t. _2) )
}
}
val wordToTuoleMap: Map[ String, List[ ( String, Int) ] ] = wordToCountList. groupBy ( t= > t. _1)
val stringToInts: Map[ String, List[ Int] ] = wordToTuoleMap. mapValues{
datas= > datas. map ( t= > t. _2)
}
stringToInts
val wordToCountMap: Map[ String, List[ Int] ] = wordToTuoleMap. map{
t= > {
( t. _1, t. _2. map ( t1 = > t1. _2) )
}
}
val wordToTotalCountMap: Map[ String, Int] = wordToTotalCountMap. map ( t= > ( t. _1, t. _2) )
println ( wordToCountMap)
}
}