Thursday, January 30, 2020

Scala : Provide column names as Seq to select statement in Scala

Scala : Provide column names as Seq to select statement in Scala 



Sample Code :

val df1: DataFrame = Seq((1,1,"kjj"), (2,2,"987987")).toDF("col1", "col2","col3")
val columnName = Seq("col1", "col3")
val DFFiltered = df1.select(columnName.map(name => col(name)): _*)
DFFiltered.show()

Full Code :
project sbt version : 2.1
------------------------build.sbt------------------------------

name := "scala_sample"
version := "0.1"
scalaVersion := "2.11.12"
libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % "2.1.0",
  "org.apache.spark" %% "spark-sql" % "2.1.0")
------------------------testing.scala------------------------------

import org.apache.spark.sql.{DataFrame,SparkSession}
import org.apache.spark.sql.functions._

object testing extends App{
  val spark = SparkSession.builder().master("local").appName("spark-shell").getOrCreate()
  import spark.implicits._
  val df1: DataFrame = Seq((1,1,"kjj"), (2,2,"987987")).toDF("col1", "col2","col3")
  val seq:Seq[String]=Seq("col1","col3")
  val obj:DF=new DF()
  obj.selectCols(df1,seq).show()
}


class DF{
  def selectCols(dataFrame: DataFrame,seq: Seq[String]): DataFrame ={
    val DFFiltered = dataFrame.select(seq.map(name => col(name)): _*)
    DFFiltered
  }
}

Result:
+----+------+
|col1|  col3|
+----+------+
|   1|   kjj|
|   2|987987|
+----+------+

No comments:

Post a Comment