Joins

Description

Joins
Nilesh Patel
Note by Nilesh Patel, updated more than 1 year ago
Nilesh Patel
Created by Nilesh Patel over 6 years ago
10
0

Resource summary

Page 1

// JOINS =====================================   Using RDD   //emp table   id,name,age 1,Vinayak, 35 2,Nilesh, 37 3,Raju, 30 4,Karthik, 28 5,Shreshta,1 6,Siddhish, 2     val emp = sc.textFile("C:/Users/Redirection/pateln7/Downloads/SPARK/emp.txt") val first = emp.first val empfile = emp.filter(x=>x!=first)   val empmap=empfile.map(x=>{   val splitted=x.split(",")   val id=splitted(0).trim.toInt   val name=splitted(1).trim   val age=splitted(2).trim.toInt   (id,(name,age)) })   empmap.foreach(println)   val output = empmap.filter(x=>x._2._2>30)     //payroll table id,dept,sal 1,dev, 3500 2,dev, 3700 3,qa, 3000 4,qa, 2800 5,prod,1000 6,prod, 2000     val payroll = sc.textFile("C:/Users/Redirection/pateln7/Downloads/SPARK/payroll.txt") val first = payroll.first val payrollfile = payroll.filter(x=>x!=first)   val payrollmap=payrollfile.map(x=>{   val splitted=x.split(",")   val id=splitted(0).trim.toInt   val dept=splitted(1).trim   val sal=splitted(2).trim.toInt   (id,(dept,sal)) })   payrollmap.foreach(println)   val output = payrollmap.filter(x=>x._2._2>2000) (1,(dev,3500)) (2,(dev,3700)) (4,(qa,2800)) (3,(qa,3000))   val joined=empmap.join(payrollmap)   joined.foreach(println)   (1,((Vinayak,35),(dev,3500))) (3,((Raju,30),(qa,3000))) (5,((Shreshta,1),(prod,1000))) (4,((Karthik,28),(qa,2800))) (6,((Siddhish,2),(prod,2000))) (2,((Nilesh,37),(dev,3700)))     age > 30 and sal > 2000   val output=joined.filter(x=>x._2._1._2>30 && x._2._2._2>2000)   Using Dataframe      id,name,age 1,Vinayak, 35 2,Nilesh, 37 3,Raju, 30 4,Karthik, 28 5,Shreshta,1 6,Siddhish, 2     val emp = sc.textFile("C:/Users/Redirection/pateln7/Downloads/SPARK/emp.txt") val first = emp.first val empfile = emp.filter(x=>x!=first)   val empmap=empfile.map(x=>{   val splitted=x.split(",")   val id=splitted(0).trim.toInt   val name=splitted(1).trim   val age=splitted(2).trim.toInt   (id,name,age) })   empmap.foreach(println) val empdf = empmap.toDF("id","name","age")     //payroll table id,dept,sal 1,dev, 3500 2,dev, 3700 3,qa, 3000 4,qa, 2800 5,prod,1000 6,prod, 2000     val payroll = sc.textFile("C:/Users/Redirection/pateln7/Downloads/SPARK/payroll.txt") val first = payroll.first val payrollfile = payroll.filter(x=>x!=first)   val payrollmap=payrollfile.map(x=>{   val splitted=x.split(",")   val id=splitted(0).trim.toInt   val dept=splitted(1).trim   val sal=splitted(2).trim.toInt   (id,dept,sal) })   payrollmap.foreach(println) val payrolldf = payrollmap.toDF("id","dept","sal")   scala> val dfjoined = empdf.join(payrolldf, empdf("id")===payrolldf("id")) dfjoined: org.apache.spark.sql.DataFrame = [id: int, name: string ... 4 more fields]   scala> dfjoined.show +---+--------+---+---+----+----+ | id|    name|age| id|dept| sal| +---+--------+---+---+----+----+ |  1| Vinayak| 35|  1| dev|3500| |  6|Siddhish|  2|  6|prod|2000| |  3|    Raju| 30|  3|  qa|3000| |  5|Shreshta|  1|  5|prod|1000| |  4| Karthik| 28|  4|  qa|2800| |  2|  Nilesh| 37|  2| dev|3700| +---+--------+---+---+----+----+   Using sqlcontext    val dfjoined = empdf.join(payrolldf, empdf("id")===payrolldf("id"))  dfjoined.registerTempTable("joinedemp")  val sqlContext = dfjoined.sqlContext  val output = sqlContext.sql("select * from joinedemp where age>30 and sal> 3500").show   //

Show full summary Hide full summary

Similar

WordCount
Nilesh Patel
Filter and Map
Nilesh Patel
Setup spark scala in windows
Nilesh Patel
Outer Joins
souravi sinha
10 Mind Mapping Strategies for Teachers
Andrea Leyden
Symbols in Lord of the Flies
lowri_luxton
TOEFL Practice
aliking
AS Psychology Unit 1 - Memory
Asterisked
Unit 3 Business Studies
Lauren Thrower
2PR101 1.test - 4. část
Nikola Truong