package my.bigdata.scala08 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.io.Source /** scala word count * Created by lq on 2017/8/7. */ object Task2 { /** * basic 核心是外部变量 + map.getOrElse */ def scalaWC0(): Unit ={ val in = new java.util.Scanner(new java.io.File("myfile.txt")) //var tt = null; val words = new ArrayBuffer[String] while(in.hasNext()) { words ++= in.next().split("\\s+") } var map = Map[String,Int]() for(key <- words) { map +=(key -> (map.getOrElse(key, 0)+1)) println((key -> (map.getOrElse(key, 0)+1))) // } print(map) } /** * basic 核心是外部变量 + map.getOrElse */ def scalaWC1(): Unit ={ val lines = Source.fromFile("myfile.txt").getLines(); val resMap = new mutable.HashMap[String,Long]() //lines.map(line=>{line.split("\\s+")}). for(line <- lines){ val fields = line.split("\\s+") for(f <- fields){ val v = resMap.getOrElse(f,0L); resMap.put(f,v+1L) } } resMap.foreach(println(_)) } /** * 利用遍历+外部变量map实现 */ def scalaWC5(): Unit = { val lines = Source.fromFile("myfile.txt").getLines(); val resMap = new mutable.HashMap[String, Long]() val res = lines.flatMap(_.split("\\s+")).map(t=>{resMap+=((t,resMap.getOrElse(t,0L)+1L))}) println(resMap) } /** * 利用遍历+外部变量map实现 */ def scalaWC4(): Unit = { val lines = Source.fromFile("myfile.txt").getLines(); val resMap = new mutable.HashMap[String, Long]() val res = lines.flatMap(line => { line.split("\\s+")}).foldLeft(resMap)((x,y:String)=>{ resMap+=((y,resMap.getOrElse(y,0L)+1L)) }) println(resMap) } /** * advance 没有用到外部变量, foldLeft 传入一个map,然后传入一个偏函数,偏函数结合遍历的数据处理map返回一个map,最后整个函数返回一个map */ def scalaWC41(): Unit = { val lines = Source.fromFile("myfile.txt").getLines(); val res = lines.flatMap(_.split("\\s+")).foldLeft(mutable.Map[String,Long]())((m,y:String)=>{ m += ((y,m.getOrElse(y,0L)+1L)) }) println(res) } /** * advance 没有用到外部变量, foldLeft的遍历所有的功能 */ def scalaWC2(): Unit ={ val lines = Source.fromFile("myfile.txt").getLines(); //val resMap = new mutable.HashMap[String,Long]() val res = lines.map(line=>{line.split("\\s+").toBuffer}) .toList.flatMap(x=>x).map((_,1)).groupBy(_._1) .map(x=>(x._1,x._2.foldLeft(0)((sum,t) =>{sum + t._2}))) println(res) } /** * advance 没有用到外部变量, reduceLeft的遍历所有的功能 */ def scalaWC3(): Unit ={ val lines = Source.fromFile("myfile.txt").getLines(); val res = lines.map(line=>{line.split("\\s+").toBuffer}) .toList.flatMap(x=>x).map((_,1)).groupBy(_._1) .map(x=>(x._2.reduceLeft((x,y)=>{ (x._1,x._2+y._2) }))) //出现 println(res) } /** * 主函数 * * @param args */ def main(args: Array[String]): Unit = { // arrFun2() // printJavaProp() //customMap() scalaWC41() } /* 总结:wc的实现思路 1.循环+外部map变量+map特性 2.利用集合的方法,通过各种变换,的到结果 3.集合具有遍历的方法有map,filter,foreach,reduceLeft,foldLeft,这些加上外部map变量+map特性都能实现WordCount */ }