Cleaning Data in Java
Dennis Lee
Software Engineer
Dennis Lee
Title | Publish Date | Rating | Review Count | Price |
---|---|---|---|---|
[null] | 1/10/23 | 4.8 | 165 | $30.61 |
9/13/19 | 4.8 | 2,521 | $38.00 | |
Clean Code | 8/1/08 | 4.7 | 5,639 | $400.00 |
import java.time.LocalDate;
public class BookSalesExample {
private record BookSales(String title, LocalDate publishDate,
int reviewCount, double rating, double price) {}}
book.rating() // book is an instance of BookSales
Title | Publish Date | Rating | Review Count | Price |
---|---|---|---|---|
Python Crash Course | 1/10/23 | 4.8 | 165 | $30.61 |
The Pragmatic Programmer | 9/13/19 | 4.8 | 2,521 | $38.00 |
Clean Code | 8/1/08 | 4.7 | 5,639 | $40.00 |
import java.time.LocalDate;
import java.util.Arrays;
import java.util.List;
// Create books with title, publishDate, reviewCount, rating, and price
List<BookSales> books = Arrays.asList(
new BookSales("Python Crash Course", LocalDate.of(2023, 1, 10),
165, 4.8, 30.61),
new BookSales("The Pragmatic Programmer", LocalDate.of(2019, 9, 13),
2521, 4.5, 38.00),
new BookSales("Clean Code", LocalDate.of(2008, 8, 1),
5639, 4.7, 40.00));
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
public class BookSalesExample { public static void main(String[] args) { // Create stats calculator DescriptiveStatistics stats = new DescriptiveStatistics(); // Add each book's price to stats books.forEach(book -> stats.addValue(book.getPrice())); } }
System.out.printf("Books analyzed: %d%n", books.size());
Books analyzed: 3
System.out.printf("Average price: $%.2f%n", stats.getMean());
Average price: $36.20
System.out.printf("Price range: $%.2f - $%.2f%n", stats.getMin(), stats.getMax());
Price range: $30.61 - $40.00
public class BookSalesExample { public static void main(String[] args) { // 50th percentile = median System.out.printf("Median price: $%.2f%n", stats.getPercentile(50));
System.out.printf("Normal range: $%.2f - $%.2f%n", stats.getPercentile(25), stats.getPercentile(75)); } }
Median price: $38.00
Normal range: $30.61 - $40.00
DescriptiveStatistics
methods.getMean()
: average price.getMin()
, .getMax()
: price range.getPercentile()
: typical pricesCleaning Data in Java