import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Data Source: https://www.kaggle.com/datasets/samuelotiattakorah/agriculture-crop-yield?resource=download # Load data df = pd.read_csv("crop_yield.csv") # Filter for Soybean only df = df[df["Crop"] == "Soybean"] # --- Single Boxplot --- plt.figure(figsize=(6, 6)) sns.boxplot(y="Yield_tons_per_hectare", data=df, color="#D44803", width=0.3) plt.ylabel("Yield", fontsize=14) plt.xlabel("") plt.title("Boxplots with PikBioStat: Soybean Yield", fontsize=16, weight="bold") # Clean theme (no gridlines, bigger y labels) sns.despine() plt.grid(False) plt.yticks(fontsize=14) plt.xticks([]) plt.show() # --- Group Comparisons (by Fertilizer_Used) --- plt.figure(figsize=(8, 6)) sns.boxplot(x="Fertilizer_Used", y="Yield_tons_per_hectare", data=df, color="#D44803", width=0.5) plt.ylabel("Yield (tons/ha)", fontsize=14, weight="bold") plt.xlabel("Fertilizer Used", fontsize=14, weight="bold") plt.title("Boxplots with PikBioStat: Soybean Yield by Fertilizer Used", fontsize=16, weight="bold") # Clean theme sns.despine() plt.grid(False) plt.xticks(rotation=45, ha="right", fontsize=12) plt.yticks(fontsize=14) plt.show()