So by just creating a quick rdd in the format of the csv-file you describe val list = sc.parallelize(List((“1″,”Timothy”,”04/02/2015″,”100″,”TV”), (“1″,”Timothy”,”04/03/2015″,”10″,”Book”), (“1″,”Timothy”,”04/03/2015″,”20″,”Book”), (“1″,”Timothy”,”04/05/2015″,”10″,”Book”),(“2″,”Ursula”,”04/02/2015″,”100″,”TV”))) And then running import java.time.LocalDate import java.time.format.DateTimeFormatter val startDate = LocalDate.of(2015,1,4) val endDate = LocalDate.of(2015,4,5) val result = list .filter{case(_,_,date,_,_) => { val localDate = LocalDate.parse(date, DateTimeFormatter.ofPattern(“MM/dd/yyyy”)) localDate.isAfter(startDate) && localDate.isBefore(endDate)}} .map{case(id, _, _, … Read more