6 Rapid analysis of ecophysiological data

Code

# Pkg -----

#package
library(ggplot2)
library(plotly)
library(readxl)
library(tidyverse)
library(car) 
library(Rmisc) 
library(multcomp) 
library(lubridate)
library(dplyr)
library(scales)
library(hms)
library(tictoc)
library(multcompView)
library(gridExtra)
library(kableExtra)
library(cowplot)
library(kableExtra)
library(ggpubr)
library(labelled)
library(gtsummary)
library(ggstats)
library(performance)
library(inspectdf)
library(patchwork)
library(gridExtra)
library(ggpubr)
library(foreach)
library(parallel)
library(MASS)
library(rstatix)

#function
source(here::here("src/function/stat_function/stat_analysis_main.R")) # for make plot 
source(here::here("src/function/graph_function.R"))
set.seed(1)

# cosmetics
climate_pallet=read_excel(here::here("data/color_palette.xlsm")) %>%
      filter(set == "climat_condition") %>%
      dplyr::select(color, treatment) %>%
      pull(color) %>%
      setNames(read_excel(here::here("data/color_palette.xlsm")) %>%
                 filter(set == "climat_condition") %>%
                 pull(treatment)
               )

6.1 For data related to biomass

6.1.1 Data importation

Code

df_physio<-read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>% 
  dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte", "paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% 
#   dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte", "paire","Hydric_potential","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass", "Trmmol","Tleaf", "Ci","PARabs","CO2S",  "PARo", 
# "H2OS",  "AHs.Cs","fda",   "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow",  "Cond",  "Tair", "SVTleaf","Press", "CO2R",  "RH_S",  "Trans", "Tair_K","h2o_i", "VpdL",  "PARi",  "CndTotal","Ci_Pa", "PhiCO2","TBlk",  "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R",  "VpdA") %>% 
  drop_na(condition,recolte) %>% 
  mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS")))

6.1.2 Fast data visualization

Code

x <- inspect_cat(df_physio)
x %>%  show_plot()

Code

#inspect_cor() returns a tibble containing Pearson’s correlation coefficient, confidence intervals and p-values for pairs of numeric columns. The function combines the functionality of cor() and cor.test() in a more convenient wrapper.

#To plot the comparison of the top 20 correlation coefficients for each condition (not show):
# px<-inspect_cor(df_physio %>%
# #                  filter(condition==i) %>%
#                   filter(recolte==2)# %>% 
#                   #dplyr::select(starts_with("weight"))
#                 ) %>% 
#   slice(1:40) %>% show_plot()
# print(px+labs(title = i))

Summarises the prevalence of missing values by each column in a data frame.

Code

inspect_na(df_physio) %>% show_plot()

6.1.3 Check for missing values

(NA) in each variable and store ‘plantid’ values

Code

na_list <- lapply(df_physio, function(x) {
  plantid_with_na <- df_physio$plant_num[is.na(x)]
  return(plantid_with_na)
})

Select only harvest 2

Code

df_physio_h2=df_physio %>% 
  filter(recolte==2) %>%
  filter(plant_num!=1113) %>% 
  filter(!plant_num %in% c(87)) %>%  #filter outlier
  mutate(condition = case_when(
    condition == "Sto_WW_OT" ~ "Stocata_WW_OT",
    condition == "Sto_WS_OT" ~ "Stocata_WS_OT",
    condition == "Sto_WW_HS" ~ "Stocata_WW_HS",
    condition == "Sto_WS_HS" ~ "Stocata_WS_HS",
    condition == "Wen_WW_OT" ~ "Wendy_WW_OT",
    condition == "Wen_WS_OT" ~ "Wendy_WS_OT",
    condition == "Wen_WW_HS" ~ "Wendy_WW_HS",
    condition == "Wen_WS_HS" ~ "Wendy_WS_HS",
    TRUE ~ condition  # Keep other values unchanged
  )) %>%
  mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% 
  mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>% 
  mutate(weight_AP=weight_stem+weight_leaf) %>% 
  mutate(ratio_s_h=weight_AP/weight_root)

6.1.4 Find Outlier

Code

var_to_test=c("Hydric_potential","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass", "Trmmol","Tleaf", "Ci","PARabs","CO2S",  "PARo", 
"H2OS",  "AHs.Cs","fda",   "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow",  "Cond",  "Tair", "SVTleaf","Press", "CO2R",  "RH_S",  "Trans", "Tair_K","h2o_i", "VpdL",  "PARi",  "CndTotal","Ci_Pa", "PhiCO2","TBlk",  "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R",  "VpdA")

var_to_test=c("weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass")

num_cores <- detectCores()  # Use all the cores available on your machine
cl=parallel::makeCluster(as.integer(num_cores-1))
doParallel::registerDoParallel(cl)
results <- foreach(i = var_to_test, .packages = c("dplyr", "here"), .combine = rbind) %dopar% {
  source(here::here("src/function/stat_function/stat_analysis_main.R")) # for make plot 
  anova_factor=2
  #cat_col(i, "green")
  plot_x=stat_analyse(
    data=df_physio_h2 %>% 
      data.frame() %>%
      drop_na(all_of(i)) %>% 
      drop_na(plant_num, condition) %>% 
      #dplyr::select(genotype, condition, water_condition, heat_condition,plant_num,climat_condition, all_off(i)) %>% 
       mutate(condition = case_when(
    condition == "Stocata_WW_OT" ~ "Sto_WW_OT",
    condition == "Stocata_WS_OT" ~ "Sto_WS_OT",
    condition == "Stocata_WW_HS" ~ "Sto_WW_HS",
    condition == "Stocata_WS_HS" ~ "Sto_WS_HS",
    condition == "Wendy_WW_OT" ~ "Wen_WW_OT",
    condition == "Wendy_WS_OT" ~ "Wen_WS_OT",
    condition == "Wendy_WW_HS" ~ "Wen_WW_HS",
    condition == "Wendy_WS_HS" ~ "Wen_WS_HS",
    TRUE ~ condition  # Keep other values unchanged
  )) %>% 
      mutate(condition=factor(condition,levels=c("Sto_WW_OT","Stoc_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% 
    mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))),
    column_value = i,
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = F,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    #Ylab_i = paste0("Shoot / Root ratio"),
  control_conditions = c("WW_OT")
)
plot_x[["outliers"]]
}
parallel::stopCluster(cl)

colnames(results)

results %>% filter(is.extreme==T) %>% mutate(plant_num=as.factor(plant_num)) %>%  count("plant_num") %>% arrange(desc(freq)) 
results %>% filter(is.outlier==T) %>% mutate(plant_num=as.factor(plant_num)) %>%  count("plant_num") %>% arrange(desc(freq))

I deleted the plant numbers 1113 and 87 because they are extreme outliers for several variables.

6.1.5 Biomass representation

Code

df_physio_h2_mean=df_physio_h2%>%
  drop_na(condition,genotype,climat_condition,weight_leaf,weight_stem, weight_root) %>% 
  dplyr::select(condition,genotype,climat_condition,plant_num,weight_leaf,weight_stem, weight_root) %>% 
  pivot_longer(c(weight_leaf,weight_stem,weight_root),
               names_to="organe", values_to="biomass") %>% 
  dplyr::group_by(condition, genotype,climat_condition, organe) %>% 
  dplyr::summarise(Mean=mean(biomass),Sd=sd(biomass))%>% 
  mutate(organe = case_when(
    organe       == "weight_leaf" ~ "Leaf",
    organe       == "weight_stem" ~ "Stem",
    organe       == "weight_root" ~ "Root",
    TRUE ~ organe        # Keep other values unchanged
  )) %>% 
  pivot_wider(names_from = organe, values_from = c("Mean","Sd")) %>% 
  mutate(adj_Leaf=Mean_Leaf + Mean_Stem + Mean_Root) %>% 
  mutate(adj_Stem= Mean_Stem + Mean_Root) %>% 
  mutate(adj_Root= Mean_Root) %>% 
  pivot_longer(cols = !c(condition,genotype,climat_condition),
               names_to = c('Type','Organe'),
               names_sep = '_',
               values_to = 'value') %>% 
  pivot_wider(names_from = Type,
              values_from = value) %>% 
  dplyr::rename("Dry Biomass"=Mean) %>% 
  mutate(Organe=factor(Organe,levels=c("Leaf","Stem","Root")))

Fig1=ggplot(df_physio_h2_mean, aes(x = Organe,y=`Dry Biomass`*1000,fill=climat_condition))+geom_errorbar(aes(ymin=`Dry Biomass`*1000-Sd*1000, ymax=`Dry Biomass`*1000+Sd*1000), width=.4,position=position_dodge(.9))+geom_bar(stat="identity",position=position_dodge(),color="black")+ theme_minimal()+
  geom_text(aes(label = round(`Dry Biomass`*1000, digits = 0),y=2), color = "white",position = position_dodge(0.9),size=4,angle = 90,hjust = 'left')+
  scale_x_discrete(limits=c("Leaf", "Stem", "Root"))+ylab("Dry biomass of each organ (mg)")+
  xlab("Organe")+facet_grid(.~genotype)+scale_fill_manual(values=manu_palett)
print(Fig1)

# Add below the sd the letter
df_physio_h2_mean$letter=c(
                            "c","e","d",
                            "c","cd","cd",
                            "b","b","b",
                            "ab","a","ab",
                            "c","cd","de",
                            "c","c","c",
                            "a","ab","ab",
                            "ab","a","a"
) 

mean_letter1=df_physio_h2_mean %>%                        # Specify data frame
  group_by(condition,climat_condition,genotype) %>%       # Specify group indicator
  summarise_at(vars(`Dry Biomass`),                       # Specify column
               list(sum))

mean_letter2=df_physio_h2_mean %>%                        # Specify data frame
  group_by(condition,climat_condition,genotype) %>%
  filter(Organe=="Leaf")%>%select(Sd)

mean_letter1=cbind(mean_letter1,mean_letter2[,4])
mean_letter1$adj_tot=mean_letter1$`Dry Biomass`+mean_letter1$Sd  
#for letter  Plettre_modif(Z=data_biomass_select,Y=data_biomass_select$Node_weight+data_biomass_select$Root_weight+data_biomass_select$PA,X=data_biomass_select$Genotype,Xlab="x",Ylab = "y")
mean_letter1$letter=c("d","cd","b","ab","cd","c","ab","a")

color_organe <- c("#68a500","#d9d2b1","#ce7f50") #leaf,#stem #root

#graph biomass
Fig_biomass=ggplot(df_physio_h2_mean, aes(x = climat_condition,y=`Dry Biomass`*1000,fill=Organe))+
  geom_text(position = position_stack(vjust = 1.00),data = mean_letter1, aes(y = adj_tot*1010+Sd*1000, label = letter, fill = "Leaf"),fontface = "bold",color="#226896")+
  geom_bar(stat = "identity",color="black")+ theme_bw()+
  geom_errorbar(aes(ymin=adj*1000, ymax=adj*1000+Sd*1000),width=.4,position="identity")+
  geom_text(position = position_stack(vjust = 0.5), aes(label = letter), color = "black",size=4)+
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1700)) +  # Ensures y-axis starts at zero
  ylab("Dry biomass of each organ (mg)")+
  xlab("Condition")+
  scale_fill_manual(values=color_organe)+
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))+
  labs(fill = "Organ")+
  facet_grid(.~genotype)+
  theme(#axis.text.x = element_text(angle=45, hjust=1, vjust=1),
        legend.position = "right",
        panel.grid = element_blank(),
              panel.background = element_rect(fill = "white", colour = "black"), 
              panel.border = element_rect(fill = NA, colour = "black"), 
              strip.background = element_rect(color = "black"),
              axis.title.x=element_blank(),
              axis.text.x=element_blank(),
              axis.ticks.x=element_blank()
        )

plot_x=stat_analyse(
    data=df_physio_h2 %>% 
      data.frame() %>%  
      drop_na(ratio_s_h,plant_num, condition) %>% 
      dplyr::select(genotype, condition, water_condition, heat_condition, ratio_s_h,plant_num,climat_condition) %>% 
       mutate(condition = case_when(
    condition == "Stocata_WW_OT" ~ "Sto_WW_OT",
    condition == "Stocata_WS_OT" ~ "Sto_WS_OT",
    condition == "Stocata_WW_HS" ~ "Sto_WW_HS",
    condition == "Stocata_WS_HS" ~ "Sto_WS_HS",
    condition == "Wendy_WW_OT" ~ "Wen_WW_OT",
    condition == "Wendy_WS_OT" ~ "Wen_WS_OT",
    condition == "Wendy_WW_HS" ~ "Wen_WW_HS",
    condition == "Wendy_WS_HS" ~ "Wen_WS_HS",
    TRUE ~ condition  # Keep other values unchanged
  )) %>% 
      mutate(condition=factor(condition,levels=c("Sto_WW_OT","Stoc_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% 
    mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))),
    column_value = "ratio_s_h",
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = paste0("Shoot / Root ratio"),
  control_conditions = c("WW_OT"),
  hex_pallet = climate_pallet
)

plot_c=Fig_biomass+plot_x[["plot"]]+plot_layout(ncol = 1,heights = c(2.8, 1))+ plot_annotation(tag_levels = 'A')
plot_c=ggarrange(Fig_biomass,plot_x[["plot"]], widths = c(1, 1),heights = c(1.7, 1),ncol=1, nrow=2,labels = c("A","B"),align='v')

ggsave(here::here("report/physio/plot/biomass_ratio_h2.svg"), plot_c, height = 7.5,width = 6)

6.1.6 Multiple regression model

Code

df_physio_h2<-read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>% 
  filter(recolte==2) %>% 
  mutate(shoot_root=(weight_stem+weight_leaf)/weight_root)

contrasts(df_physio_h2$genotype) <- contr.sum # to say look at the big average only for genotype (juste an other representation)
mod1=lm(formula = shoot_root ~ water_condition*heat_condition*genotype+position+storage_line, data = df_physio_h2 %>% drop_na(shoot_root, position),contrasts = list(genotype = MASS::contr.sdif))
p_x<-ggcoef_model(mod1)
ggsave(here::here("report/physio/plot/ggcoef_model_1.svg"),p_x)

A criterion must be defined to determine the quality of a model. One of the most widely used is the Akaike Information Criterion or AIC. It is a compromise between the number of degrees of freedom (e.g. the number of coefficients in the model) we wish to minimize and the explained variance we wish to maximize (the likelihood).

The AIC resuls for this first model is 252.38

The step() function selects the best model using a top-down step-by-step procedure based on AIC minimization. The function displays the various selection steps on the screen and returns the final model.

The AIC resuls for this second model is 246.72

Display performance indicators and the regression results

Code

# mod2 |> performance::model_performance()
performance::compare_performance(mod1, mod2)
mod2 |> 
  tbl_regression() |> 
  bold_labels() |> 
  add_glance_source_note()

Comparison of the two models

Code

ggstats::ggcoef_compare(
  list(mod1,mod2),
  tidy_fun = broom.helpers::tidy_marginal_predictions,
  type = "dodge",
  vline = FALSE
)

# Draft
df_physio_h2 %>%
  dplyr::select(plant_num,water_condition, heat_condition, genotype, sum_biomass) %>% 
  group_by(water_condition, heat_condition, genotype) %>%
  identify_outliers(sum_biomass)

df_without_outlier<-df_physio_h2 %>% 
  filter(!plant_num%in% c(143,1063,1091,1129)) %>% 
  drop_na(sum_biomass)

m_out<-lm(data=df_without_outlier,sum_biomass~genotype*heat_condition*water_condition)   

ggqqplot(residuals(m_out))
shapiro_test(residuals(m_out))

ggqqplot(df_physio_h2, "sum_biomass", ggtheme = theme_bw()) +
  facet_grid(water_condition + heat_condition ~ genotype, labeller = "label_both")

df_physio_h2 %>% levene_test(sum_biomass ~ water_condition*heat_condition*genotype)
summary(mod2)

res.aov <- df_physio_h2 %>% anova_test(sum_biomass ~ water_condition*heat_condition*genotype)
res.aov

res.aov2 <- df_physio_h2 %>% welch_anova_test(sum_biomass ~ condition)
# Pairwise comparisons (Games-Howell)
pwc2 <- df_physio_h2 %>% games_howell_test(sum_biomass ~ condition)

# Visualization: Boxplots with p-values
mod=rlm(data=df_without_outlier,sum_biomass~heat_condition*water_condition)
summary(mod)
anova(mod)

hist(df_without_outlier$sum_biomass, main = "Histogram of my data", xlab = "Valeurs", ylab = "Frequency", col = "skyblue", border = "white")

test=df_physio_h2 %>% tukey_hsd(sum_biomass ~ genotype*water_condition*heat_condition)

6.1.7 Contribution of each variable related to biomass

Code

df_physio_h2_v<-df_physio_h2 %>% 
  dplyr::select(plant_num,genotype,heat_condition,water_condition,sum_biomass,weight_leaf, weight_stem, weight_root,ratio_s_h,"leaf_area") %>% 
  pivot_longer(-c(plant_num,genotype,heat_condition,water_condition),names_to = "variable")

compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8))
for (i in 1:length(levels(as.factor(df_physio_h2_v$variable)))){
  variable_l_i=as.character(levels(as.factor(df_physio_h2_v$variable))[i])
  cat( "Variable:" ,variable_l_i, "\n")
  
  df_x= df_physio_h2_v  %>% 
    filter(variable==variable_l_i) %>% 
    drop_na(value)
  
  formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = "")))
  
  # Perform ANOVA using the aov function
  result_variable <- aov(data = df_x, formula = formula_string)
  anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`))
  
  row_name<-rownames(anova_result)
  row_name <- gsub("genotype","Genotype",row_name)
  row_name <- gsub("water_condition","Water",row_name)
  row_name <- gsub("heat_condition","Heat",row_name)
  rownames(anova_result)<-row_name
  anova_result$variable=as.character(df_x$variable)[1]
  compile_result=rbind(compile_result,anova_result)
}

compile_result<-compile_result %>% 
  as.data.frame() %>% 
  rownames_to_column("contribution_variable") %>% 
  mutate(Significance = case_when(
    `Pr(>F)` <= 0.001 ~ '***',
    `Pr(>F)` < 0.01  ~ '**',
    `Pr(>F)` < 0.05  ~ '*',
    `Pr(>F)` < 0.1   ~ '.',
    TRUE            ~ ' '
  ))

compile_result=compile_result%>% 
  filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% 
  filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% 
  mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% 
  mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>%
  mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% 
  mutate(contribution_variable = as.character(contribution_variable)) %>% 
  mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% 
  mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% 
  mutate(text_output=paste0(round(R2*100,0), "% ", Significance))

plot_contrib<-ggplot(compile_result, aes(x = variable, y = R2, fill = contribution_variable)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+
  scale_fill_manual(values = c("#ffd166", # geno
                               "#118ab2", # water
                               "#ef476f", # heat
                               "#06d6a0", # watergeno
                               "#f78c6b", # genoheat
                               "#FFB6C1", # waterheat
                               "#333333"),
                    name = "title of legend (contribution") +# all
  scale_y_continuous(labels = scales::percent_format())+
  scale_color_identity()+
  labs(x = "Compartment",
       y = "The relative contribution for the different variables (%)",
       ) +
  theme_minimal()+
  theme(panel.grid = element_blank(),
        legend.title = element_blank(), # for delete title
        axis.text.x = element_text(angle = 45,hjust = 1, vjust = 1),
        axis.title.x = element_blank()
  )

ggsave(here::here("report/physio/plot/contribution.svg"),plot_contrib)

6.1.8 Student test

Code

test=read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>%
  filter(recolte==2) %>% 
  drop_na(Hydric_potential) %>%
  #filter(climat_condition=="WW_OT") %>% 
  t_test(Hydric_potential ~ condition,p.adjust.method = "BH") %>%
  add_significance()

Code

df_physio_s=read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>%
  filter(recolte==2) %>% 
  drop_na(Hydric_potential) %>% 
  #filter(genotype=="Stocata") %>% 
  #filter(climat_condition%in% c("WW_OT","WS_HS")) %>% 
  dplyr::group_by(condition) %>% 
  dplyr::summarise(Mean=mean(Hydric_potential))

(df_physio_s$Mean[1]-df_physio_s$Mean[2])/df_physio_s$Mean[2]

6.2 For the data related to licor or leaf water pottential

6.2.1 Data importation

Code

df_physio_other<-read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>% 
  dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte", #"paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% 
#   dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte",
"Hydric_potential", "Trmmol","Tleaf", "Ci","PARabs","CO2S",  "PARo", "H2OS",  "AHs.Cs","fda",   "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow",  "Cond",  "Tair", "SVTleaf","Press", "CO2R",  "RH_S",  "Trans", "Tair_K","h2o_i", "VpdL",  "PARi",  "CndTotal","Ci_Pa", "PhiCO2","TBlk",  "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R",  "VpdA") %>% 
  drop_na(condition,recolte) %>% 
  mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS")))

6.2.2 Fast data visualization

Code

x <- inspect_cat(df_physio_other)
x %>%  show_plot()

Code

#inspect_cor() returns a tibble containing Pearson’s correlation coefficient, confidence intervals and p-values for pairs of numeric columns. The function combines the functionality of cor() and cor.test() in a more convenient wrapper.

#To plot the comparison of the top 20 correlation coefficients for each condition (not show):
# px<-inspect_cor(df_physio %>%
# #                  filter(condition==i) %>%
#                   filter(recolte==2)# %>% 
#                   #dplyr::select(starts_with("weight"))
#                 ) %>% 
#   slice(1:40) %>% show_plot()
# print(px+labs(title = i))

Summarises the prevalence of missing values by each column in a data frame.

Code

inspect_na(df_physio_other) %>% show_plot()

6.2.3 Check for missing values

Code

na_list <- lapply(df_physio_other, function(x) {
  plantid_with_na <- df_physio_other$plant_num[is.na(x)]
  return(plantid_with_na)
})

Select only harvest 2

Code

df_physio_other_h2=df_physio_other %>% 
  filter(recolte==2) %>%
  filter(plant_num!=1113) %>% 
  filter(!plant_num %in% c(87)) %>%  #filter outlier
  mutate(condition = case_when(
    condition == "Sto_WW_OT" ~ "Stocata_WW_OT",
    condition == "Sto_WS_OT" ~ "Stocata_WS_OT",
    condition == "Sto_WW_HS" ~ "Stocata_WW_HS",
    condition == "Sto_WS_HS" ~ "Stocata_WS_HS",
    condition == "Wen_WW_OT" ~ "Wendy_WW_OT",
    condition == "Wen_WS_OT" ~ "Wendy_WS_OT",
    condition == "Wen_WW_HS" ~ "Wendy_WW_HS",
    condition == "Wen_WS_HS" ~ "Wendy_WS_HS",
    TRUE ~ condition  # Keep other values unchanged
  )) %>%
  mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% 
  mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS")))

6.2.4 Contribution of each variable related to other variable

Code

df_physio_h2_v=df_physio_other_h2 %>%
  dplyr::select(-c(acronyme_analyse,position, storage_line, unit, analyse_by_plant, recolte,
                   Flow,fda,Press, CO2R,RH_R)) %>%  # not interesting data measured by lycor
  pivot_longer(-c(plant_num,genotype,heat_condition,water_condition,condition,climat_condition),names_to = "variable",values_to = "value")

compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8))
for (i in 1:length(levels(as.factor(df_physio_h2_v$variable)))){
  variable_l_i=as.character(levels(as.factor(df_physio_h2_v$variable))[i])
  cat( "Variable:" ,variable_l_i, "\n")
  
  df_x= df_physio_h2_v  %>% 
    filter(variable==variable_l_i) %>% 
    drop_na(value)
  
  formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = "")))
  
  # Carry out ANOVA using the aov function
  result_variable <- aov(data = df_x, formula = formula_string)
  anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`))
  
  row_name<-rownames(anova_result)
  row_name <- gsub("genotype","Genotype",row_name)
  row_name <- gsub("water_condition","Water",row_name)
  row_name <- gsub("heat_condition","Heat",row_name)
  rownames(anova_result)<-row_name
  anova_result$variable=as.character(df_x$variable)[1]
  compile_result=rbind(compile_result,anova_result)
}

compile_result<-compile_result %>% 
  as.data.frame() %>% 
  rownames_to_column("contribution_variable") %>% 
  mutate(Significance = case_when(
    `Pr(>F)` <= 0.001 ~ '***',
    `Pr(>F)` < 0.01  ~ '**',
    `Pr(>F)` < 0.05  ~ '*',
    `Pr(>F)` < 0.1   ~ '.',
    TRUE            ~ ' '
  ))

compile_result=compile_result%>% 
  filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% 
  filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% 
  mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% 
  mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>%
  mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% 
  mutate(contribution_variable = as.character(contribution_variable)) %>% 
  mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% 
  mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% 
  mutate(text_output=paste0(round(R2*100,0), "% ", Significance))

plot_contrib<-ggplot(compile_result, aes(x = variable, y = R2, fill = contribution_variable)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+
  scale_fill_manual(values = c("#ffd166", # geno
                               "#118ab2", # water
                               "#ef476f", # heat
                               "#06d6a0", # watergeno
                               "#f78c6b", # genoheat
                               "#FFB6C1", # waterheat
                               "#333333"),
                    name = "title of legend (contribution") +# all
  scale_y_continuous(labels = scales::percent_format())+
  scale_color_identity()+
  labs(x = "Compartment",
       y = "The relative contribution for the different variables (%)",
       ) +
  theme_minimal()+
  theme(panel.grid = element_blank(),
        legend.title = element_blank(), # for delete title
        axis.text.x = element_text(angle = 45,hjust = 1, vjust = 1),
        axis.title.x = element_blank()
  )+
  coord_flip(); plot_contrib

ggsave(here::here("report/physio/plot/contribution_other.svg"),plot_contrib, height = 16,width = 10)

6.2.5 Summary of some interesting variables in ecophysiology

6.2.5.1 Plot

Code

df_evapo_H2 <- read.csv(here::here("data/water/weighing_watering_output/sum_evapotranspi.csv"))[-1] %>% 
  dplyr::rename(plant_num = plant_id) %>% 
  mutate(sum_tot_evapo_plant = sum_total_evapotranspiration/2) %>% 
  filter(recolte == 2) %>% 
  dplyr::select(plant_num, sum_tot_evapo_plant)

df_physio_other_H2<-read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  left_join(.,df_evapo_H2, by = "plant_num") %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>% 
  mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% 
  mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>%  
  mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>%
#   dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte", #"paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% 
# #   dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position"   ,"storage_line"     ,"unit"  ,"analyse_by_plant", "recolte",
# "Hydric_potential", "Trmmol","Tleaf", "Ci","PARabs","CO2S",  "PARo", "H2OS",  "AHs.Cs","fda",   "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow",  "Cond",  "Tair", "SVTleaf","Press", "CO2R",  "RH_S",  "Trans", "Tair_K","h2o_i", "VpdL",  "PARi",  "CndTotal","Ci_Pa", "PhiCO2","TBlk",  "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R",  "VpdA") %>% 
  drop_na(condition,recolte) %>% 
  filter(recolte==2) 

Fig_evapo=stat_analyse(
    data=df_physio_other_H2 %>% 
      drop_na(climat_condition,sum_tot_evapo_plant),
    column_value = "sum_tot_evapo_plant",
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = "Total evapotranspiration (mL)",
  control_conditions = c("WW_OT"),
  strip_normale = F,
  hex_pallet = climate_pallet
)

Fig_tleaf=stat_analyse(
    data=df_physio_other_H2 %>% 
      drop_na(climat_condition,Tleaf),
    column_value = "Tleaf",
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = "Leaf temperature (°C)",
  control_conditions = c("WW_OT"),
  strip_normale = F,
  hex_pallet = climate_pallet
)

Fig_photo=stat_analyse(
    data=df_physio_other_H2 %>% 
      drop_na(climat_condition,Photo),
    column_value = "Photo",
    category_variables = c("climat_condition"), 
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = expression("Photosynthesis" ~ mu * mol ~ CO[2] ~ m^-2 ~ s^-1),
  control_conditions = c("WW_OT"),
  strip_normale = F,
  hex_pallet = climate_pallet
)

Fig_SLA=stat_analyse(
    data=df_physio_other_H2 %>% 
      drop_na(climat_condition,SLA),
    column_value = "SLA", 
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = "Specific leaf area (m²/kg)",
  control_conditions = c("WW_OT"),
  strip_normale = F,
  hex_pallet = climate_pallet
)

Fig_LWP=stat_analyse(
    data=df_physio_other_H2 %>% 
      drop_na(climat_condition,Hydric_potential),
    column_value = "Hydric_potential",
    category_variables = c("climat_condition"),
    grp_var = "genotype",
    show_plot = T,
    outlier_show = F, 
    label_outlier = "plant_num",
    biologist_stats = T,
    Ylab_i = "Leaf water potential (Mpa)",
  control_conditions = c("WW_OT"),
  strip_normale = F,
  hex_pallet = climate_pallet
)

px=ggarrange(
  Fig_evapo[["plot"]]+labs(color="Treatment",fill="Treatment"),
  Fig_tleaf[["plot"]]+labs(color="Treatment",fill="Treatment"),
  Fig_photo[["plot"]]+labs(color="Treatment",fill="Treatment"),
  #Fig_SLA[["plot"]]+labs(color="Treatment",fill="Treatment"),
  Fig_LWP[["plot"]]+labs(color="Treatment",fill="Treatment"),
  ncol=4, nrow=1,common.legend = TRUE, legend="right",
  #labels = c("A","B","C", "D", "F"),
  align='h')
px

#export to delet
png(here::here("report/physio/plot/evapo_tleaf_lwp.png"), width = 30, height = 8, units = 'cm', res = 900)
grid.arrange (px) # Make plot
dev.off()

# test=df_physio_other_H2 %>%drop_na(SLA) %>% dplyr::select(plant_num,condition,leaf_scan,planimeter_leaf, leaf_area,weight_leaf,SLA) %>% mutate(scan=ifelse(is.na(planimeter_leaf),"scan","planimeter")) %>% mutate(cond_type=paste0(condition,"_",scan)) %>% 
# ggplot(aes(x=cond_type,y=leaf_area))+geom_boxplot()

6.2.5.2 Contribution

Code

df_evapo_H2 <- read.csv(here::here("data/water/weighing_watering_output/sum_evapotranspi.csv"))[-1] %>% 
  dplyr::rename(plant_num = plant_id) %>% 
  mutate(sum_tot_evapo_plant = sum_total_evapotranspiration/2) %>% 
  filter(recolte == 2) %>% 
  dplyr::select(plant_num, sum_tot_evapo_plant)

df_physio_other_H2_v<-read.csv2(here::here("data/physio/global_physio.csv")) %>% 
  left_join(.,df_evapo_H2, by = "plant_num") %>% 
  mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% 
  mutate(across(c("storage_line","position"), as.integer)) %>% 
  mutate(water_condition=relevel(water_condition, "WW")) %>% 
  mutate(heat_condition=relevel(heat_condition, "OT")) %>% 
  mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% 
  mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>%  
  mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>%
  drop_na(condition,recolte) %>% 
  filter(recolte==2) %>% 
  dplyr::select(water_condition, heat_condition, genotype, plant_num, condition, climat_condition, sum_tot_evapo_plant, Tleaf,Photo,#SLA,
                Hydric_potential) %>% 
  pivot_longer(-c(plant_num,genotype,heat_condition,water_condition,condition,climat_condition),names_to = "variable",values_to = "value") %>% 
  mutate(variable = case_when(
    variable == "sum_tot_evapo_plant" ~ "Total evapotranspiration",
    variable == "Tleaf" ~ "Leaf temperature",
    variable == "Photo" ~ "Photosynthesis",
    variable == "SLA" ~ "Specific leaf area",
    variable == "Hydric_potential" ~ "Leaf water potential",
    TRUE ~ variable  
    )
  ) 



compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8))
for (i in 1:length(levels(as.factor(df_physio_other_H2_v$variable)))){
  variable_l_i=as.character(levels(as.factor(df_physio_other_H2_v$variable))[i])
  cat( "Variable:" ,variable_l_i, "\n")
  
  df_x= df_physio_other_H2_v  %>% 
    filter(variable==variable_l_i) %>% 
    drop_na(value)
  
  formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = "")))
  
  # Carry out ANOVA using the aov function
  result_variable <- aov(data = df_x, formula = formula_string)
  anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`))
  
  row_name<-rownames(anova_result)
  row_name <- gsub("genotype","Genotype",row_name)
  row_name <- gsub("water_condition","Water",row_name)
  row_name <- gsub("heat_condition","Heat",row_name)
  rownames(anova_result)<-row_name
  anova_result$variable=as.character(df_x$variable)[1]
  compile_result=rbind(compile_result,anova_result)
}

compile_result<-compile_result %>% 
  as.data.frame() %>% 
  rownames_to_column("contribution_variable") %>% 
  mutate(Significance = case_when(
    `Pr(>F)` <= 0.001 ~ '***',
    `Pr(>F)` < 0.01  ~ '**',
    `Pr(>F)` < 0.05  ~ '*',
    `Pr(>F)` < 0.1   ~ '.',
    TRUE            ~ ' '
  ))
library(forcats)
compile_result=compile_result%>% 
  filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% 
  filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% 
  mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% 
  mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>%
  mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% 
  mutate(contribution_variable = as.character(contribution_variable)) %>% 
  mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% 
  mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% 
  mutate(text_output=paste0(round(R2*100,0), "% ", Significance)) %>% 
  mutate(variable=factor(variable,levels=c("Total evapotranspiration","Leaf temperature","Photosynthesis","Leaf water potential"))) %>% arrange(desc(variable))

plot_contrib<-ggplot(compile_result, aes(x = fct_inorder(variable), y = R2, fill = contribution_variable)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+
  scale_fill_manual(values = c("#ffd166", # geno
                               "#118ab2", # water
                               "#ef476f", # heat
                               "#06d6a0", # watergeno
                               "#f78c6b", # genoheat
                               "#FFB6C1", # wate rheat
                               "#333333"),
                    name = "title of legend (contribution") +# all
  scale_y_continuous(labels = scales::percent_format())+
  scale_color_identity()+
  labs(x = "Variable",
       y = "The relative contribution for the different variables (%)",
       ) +
  theme_classic()+
  theme(panel.grid = element_blank(),
        legend.title = element_blank(), # for delete title
        axis.text.x = element_text(angle = 0,hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        axis.line = element_line(size = 0.3)
  )+
  coord_flip(); plot_contrib

ggsave(here::here("report/physio/plot/contrib_physio_interesting_var.svg"),plot_contrib, height = 2,width = 11)
ggsave(here::here("report/physio/plot/contrib_physio_interesting_var.png"),plot_contrib, height = 2,width = 11, dpi = 600)

--- output: html_document editor_options: chunk_output_type: console --- ## Rapid analysis of ecophysiological data {#sec-physio_resum} ```{r setup,message=FALSE, warning=FALSE,echo=T} # Pkg ----- #package library(ggplot2) library(plotly) library(readxl) library(tidyverse) library(car) library(Rmisc) library(multcomp) library(lubridate) library(dplyr) library(scales) library(hms) library(tictoc) library(multcompView) library(gridExtra) library(kableExtra) library(cowplot) library(kableExtra) library(ggpubr) library(labelled) library(gtsummary) library(ggstats) library(performance) library(inspectdf) library(patchwork) library(gridExtra) library(ggpubr) library(foreach) library(parallel) library(MASS) library(rstatix) #function source(here::here("src/function/stat_function/stat_analysis_main.R")) # for make plot source(here::here("src/function/graph_function.R")) set.seed(1) # cosmetics climate_pallet=read_excel(here::here("data/color_palette.xlsm")) %>% filter(set == "climat_condition") %>% dplyr::select(color, treatment) %>% pull(color) %>% setNames(read_excel(here::here("data/color_palette.xlsm")) %>% filter(set == "climat_condition") %>% pull(treatment) ) ``` ## For data related to biomass ### Data importation ```{r} df_physio<-read.csv2(here::here("data/physio/global_physio.csv")) %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", "paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% # dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", "paire","Hydric_potential","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass", "Trmmol","Tleaf", "Ci","PARabs","CO2S", "PARo", # "H2OS", "AHs.Cs","fda", "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow", "Cond", "Tair", "SVTleaf","Press", "CO2R", "RH_S", "Trans", "Tair_K","h2o_i", "VpdL", "PARi", "CndTotal","Ci_Pa", "PhiCO2","TBlk", "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R", "VpdA") %>% drop_na(condition,recolte) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) ``` ### Fast data visualization ```{r} x <- inspect_cat(df_physio) x %>% show_plot() #inspect_cor() returns a tibble containing Pearson’s correlation coefficient, confidence intervals and p-values for pairs of numeric columns. The function combines the functionality of cor() and cor.test() in a more convenient wrapper. #To plot the comparison of the top 20 correlation coefficients for each condition (not show): # px<-inspect_cor(df_physio %>% # # filter(condition==i) %>% # filter(recolte==2)# %>% # #dplyr::select(starts_with("weight")) # ) %>% # slice(1:40) %>% show_plot() # print(px+labs(title = i)) ``` Summarises the prevalence of missing values by each column in a data frame. ```{r} inspect_na(df_physio) %>% show_plot() ``` ### Check for missing values (NA) in each variable and store 'plantid' values ```{r} na_list <- lapply(df_physio, function(x) { plantid_with_na <- df_physio$plant_num[is.na(x)] return(plantid_with_na) }) ``` *Select only harvest 2* ```{r, eval=T} df_physio_h2=df_physio %>% filter(recolte==2) %>% filter(plant_num!=1113) %>% filter(!plant_num %in% c(87)) %>% #filter outlier mutate(condition = case_when( condition == "Sto_WW_OT" ~ "Stocata_WW_OT", condition == "Sto_WS_OT" ~ "Stocata_WS_OT", condition == "Sto_WW_HS" ~ "Stocata_WW_HS", condition == "Sto_WS_HS" ~ "Stocata_WS_HS", condition == "Wen_WW_OT" ~ "Wendy_WW_OT", condition == "Wen_WS_OT" ~ "Wendy_WS_OT", condition == "Wen_WW_HS" ~ "Wendy_WW_HS", condition == "Wen_WS_HS" ~ "Wendy_WS_HS", TRUE ~ condition # Keep other values unchanged )) %>% mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>% mutate(weight_AP=weight_stem+weight_leaf) %>% mutate(ratio_s_h=weight_AP/weight_root) ``` ### Find Outlier ```{r, eval=F} var_to_test=c("Hydric_potential","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass", "Trmmol","Tleaf", "Ci","PARabs","CO2S", "PARo", "H2OS", "AHs.Cs","fda", "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow", "Cond", "Tair", "SVTleaf","Press", "CO2R", "RH_S", "Trans", "Tair_K","h2o_i", "VpdL", "PARi", "CndTotal","Ci_Pa", "PhiCO2","TBlk", "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R", "VpdA") var_to_test=c("weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") num_cores <- detectCores() # Use all the cores available on your machine cl=parallel::makeCluster(as.integer(num_cores-1)) doParallel::registerDoParallel(cl) results <- foreach(i = var_to_test, .packages = c("dplyr", "here"), .combine = rbind) %dopar% { source(here::here("src/function/stat_function/stat_analysis_main.R")) # for make plot anova_factor=2 #cat_col(i, "green") plot_x=stat_analyse( data=df_physio_h2 %>% data.frame() %>% drop_na(all_of(i)) %>% drop_na(plant_num, condition) %>% #dplyr::select(genotype, condition, water_condition, heat_condition,plant_num,climat_condition, all_off(i)) %>% mutate(condition = case_when( condition == "Stocata_WW_OT" ~ "Sto_WW_OT", condition == "Stocata_WS_OT" ~ "Sto_WS_OT", condition == "Stocata_WW_HS" ~ "Sto_WW_HS", condition == "Stocata_WS_HS" ~ "Sto_WS_HS", condition == "Wendy_WW_OT" ~ "Wen_WW_OT", condition == "Wendy_WS_OT" ~ "Wen_WS_OT", condition == "Wendy_WW_HS" ~ "Wen_WW_HS", condition == "Wendy_WS_HS" ~ "Wen_WS_HS", TRUE ~ condition # Keep other values unchanged )) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Stoc_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))), column_value = i, category_variables = c("climat_condition"), grp_var = "genotype", show_plot = F, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, #Ylab_i = paste0("Shoot / Root ratio"), control_conditions = c("WW_OT") ) plot_x[["outliers"]] } parallel::stopCluster(cl) colnames(results) results %>% filter(is.extreme==T) %>% mutate(plant_num=as.factor(plant_num)) %>% count("plant_num") %>% arrange(desc(freq)) results %>% filter(is.outlier==T) %>% mutate(plant_num=as.factor(plant_num)) %>% count("plant_num") %>% arrange(desc(freq)) ``` ::: callout-important ## I deleted the plant numbers 1113 and 87 because they are extreme outliers for several variables. ::: ### Biomass representation ```{r,eval=F} df_physio_h2_mean=df_physio_h2%>% drop_na(condition,genotype,climat_condition,weight_leaf,weight_stem, weight_root) %>% dplyr::select(condition,genotype,climat_condition,plant_num,weight_leaf,weight_stem, weight_root) %>% pivot_longer(c(weight_leaf,weight_stem,weight_root), names_to="organe", values_to="biomass") %>% dplyr::group_by(condition, genotype,climat_condition, organe) %>% dplyr::summarise(Mean=mean(biomass),Sd=sd(biomass))%>% mutate(organe = case_when( organe == "weight_leaf" ~ "Leaf", organe == "weight_stem" ~ "Stem", organe == "weight_root" ~ "Root", TRUE ~ organe # Keep other values unchanged )) %>% pivot_wider(names_from = organe, values_from = c("Mean","Sd")) %>% mutate(adj_Leaf=Mean_Leaf + Mean_Stem + Mean_Root) %>% mutate(adj_Stem= Mean_Stem + Mean_Root) %>% mutate(adj_Root= Mean_Root) %>% pivot_longer(cols = !c(condition,genotype,climat_condition), names_to = c('Type','Organe'), names_sep = '_', values_to = 'value') %>% pivot_wider(names_from = Type, values_from = value) %>% dplyr::rename("Dry Biomass"=Mean) %>% mutate(Organe=factor(Organe,levels=c("Leaf","Stem","Root"))) Fig1=ggplot(df_physio_h2_mean, aes(x = Organe,y=`Dry Biomass`*1000,fill=climat_condition))+geom_errorbar(aes(ymin=`Dry Biomass`*1000-Sd*1000, ymax=`Dry Biomass`*1000+Sd*1000), width=.4,position=position_dodge(.9))+geom_bar(stat="identity",position=position_dodge(),color="black")+ theme_minimal()+ geom_text(aes(label = round(`Dry Biomass`*1000, digits = 0),y=2), color = "white",position = position_dodge(0.9),size=4,angle = 90,hjust = 'left')+ scale_x_discrete(limits=c("Leaf", "Stem", "Root"))+ylab("Dry biomass of each organ (mg)")+ xlab("Organe")+facet_grid(.~genotype)+scale_fill_manual(values=manu_palett) print(Fig1) # Add below the sd the letter df_physio_h2_mean$letter=c( "c","e","d", "c","cd","cd", "b","b","b", "ab","a","ab", "c","cd","de", "c","c","c", "a","ab","ab", "ab","a","a" ) mean_letter1=df_physio_h2_mean %>% # Specify data frame group_by(condition,climat_condition,genotype) %>% # Specify group indicator summarise_at(vars(`Dry Biomass`), # Specify column list(sum)) mean_letter2=df_physio_h2_mean %>% # Specify data frame group_by(condition,climat_condition,genotype) %>% filter(Organe=="Leaf")%>%select(Sd) mean_letter1=cbind(mean_letter1,mean_letter2[,4]) mean_letter1$adj_tot=mean_letter1$`Dry Biomass`+mean_letter1$Sd #for letter Plettre_modif(Z=data_biomass_select,Y=data_biomass_select$Node_weight+data_biomass_select$Root_weight+data_biomass_select$PA,X=data_biomass_select$Genotype,Xlab="x",Ylab = "y") mean_letter1$letter=c("d","cd","b","ab","cd","c","ab","a") color_organe <- c("#68a500","#d9d2b1","#ce7f50") #leaf,#stem #root #graph biomass Fig_biomass=ggplot(df_physio_h2_mean, aes(x = climat_condition,y=`Dry Biomass`*1000,fill=Organe))+ geom_text(position = position_stack(vjust = 1.00),data = mean_letter1, aes(y = adj_tot*1010+Sd*1000, label = letter, fill = "Leaf"),fontface = "bold",color="#226896")+ geom_bar(stat = "identity",color="black")+ theme_bw()+ geom_errorbar(aes(ymin=adj*1000, ymax=adj*1000+Sd*1000),width=.4,position="identity")+ geom_text(position = position_stack(vjust = 0.5), aes(label = letter), color = "black",size=4)+ scale_y_continuous(expand = c(0, 0), limits = c(0, 1700)) + # Ensures y-axis starts at zero ylab("Dry biomass of each organ (mg)")+ xlab("Condition")+ scale_fill_manual(values=color_organe)+ theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))+ labs(fill = "Organ")+ facet_grid(.~genotype)+ theme(#axis.text.x = element_text(angle=45, hjust=1, vjust=1), legend.position = "right", panel.grid = element_blank(), panel.background = element_rect(fill = "white", colour = "black"), panel.border = element_rect(fill = NA, colour = "black"), strip.background = element_rect(color = "black"), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank() ) plot_x=stat_analyse( data=df_physio_h2 %>% data.frame() %>% drop_na(ratio_s_h,plant_num, condition) %>% dplyr::select(genotype, condition, water_condition, heat_condition, ratio_s_h,plant_num,climat_condition) %>% mutate(condition = case_when( condition == "Stocata_WW_OT" ~ "Sto_WW_OT", condition == "Stocata_WS_OT" ~ "Sto_WS_OT", condition == "Stocata_WW_HS" ~ "Sto_WW_HS", condition == "Stocata_WS_HS" ~ "Sto_WS_HS", condition == "Wendy_WW_OT" ~ "Wen_WW_OT", condition == "Wendy_WS_OT" ~ "Wen_WS_OT", condition == "Wendy_WW_HS" ~ "Wen_WW_HS", condition == "Wendy_WS_HS" ~ "Wen_WS_HS", TRUE ~ condition # Keep other values unchanged )) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Stoc_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))), column_value = "ratio_s_h", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = paste0("Shoot / Root ratio"), control_conditions = c("WW_OT"), hex_pallet = climate_pallet ) plot_c=Fig_biomass+plot_x[["plot"]]+plot_layout(ncol = 1,heights = c(2.8, 1))+ plot_annotation(tag_levels = 'A') plot_c=ggarrange(Fig_biomass,plot_x[["plot"]], widths = c(1, 1),heights = c(1.7, 1),ncol=1, nrow=2,labels = c("A","B"),align='v') ggsave(here::here("report/physio/plot/biomass_ratio_h2.svg"), plot_c, height = 7.5,width = 6) ``` ![](plot/biomass_ratio_h2.svg) ### Multiple regression model ```{r,eval=T} df_physio_h2<-read.csv2(here::here("data/physio/global_physio.csv")) %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% filter(recolte==2) %>% mutate(shoot_root=(weight_stem+weight_leaf)/weight_root) contrasts(df_physio_h2$genotype) <- contr.sum # to say look at the big average only for genotype (juste an other representation) mod1=lm(formula = shoot_root ~ water_condition*heat_condition*genotype+position+storage_line, data = df_physio_h2 %>% drop_na(shoot_root, position),contrasts = list(genotype = MASS::contr.sdif)) p_x<-ggcoef_model(mod1) ggsave(here::here("report/physio/plot/ggcoef_model_1.svg"),p_x) ``` A criterion must be defined to determine the quality of a model. One of the most widely used is the Akaike Information Criterion or AIC. It is a compromise between the number of degrees of freedom (e.g. the number of coefficients in the model) we wish to minimize and the explained variance we wish to maximize (the likelihood). The AIC resuls for this first model is **`r round(AIC(mod1),2)`** The step() function selects the best model using a top-down step-by-step procedure based on AIC minimization. The function displays the various selection steps on the screen and returns the final model. ```{r, warning=FALSE, message=FALSE,echo=F,include = FALSE} mod2 <- step(mod1) p_x<-ggcoef_model(mod2) ggsave(here::here("report/physio/plot/ggcoef_model_2.svg"),p_x) ``` The AIC resuls for this second model is **`r round(AIC(mod2),2)`** ::: {style="display: flex; justify-content: space-between;"} ![](plot/ggcoef_model_1.svg) ![](plot/ggcoef_model_2.svg) ::: Display performance indicators and the regression results ```{r, eval=F} # mod2 |> performance::model_performance() performance::compare_performance(mod1, mod2) mod2 |> tbl_regression() |> bold_labels() |> add_glance_source_note() ``` Comparison of the two models ```{r, eval=F} ggstats::ggcoef_compare( list(mod1,mod2), tidy_fun = broom.helpers::tidy_marginal_predictions, type = "dodge", vline = FALSE ) # Draft df_physio_h2 %>% dplyr::select(plant_num,water_condition, heat_condition, genotype, sum_biomass) %>% group_by(water_condition, heat_condition, genotype) %>% identify_outliers(sum_biomass) df_without_outlier<-df_physio_h2 %>% filter(!plant_num%in% c(143,1063,1091,1129)) %>% drop_na(sum_biomass) m_out<-lm(data=df_without_outlier,sum_biomass~genotype*heat_condition*water_condition) ggqqplot(residuals(m_out)) shapiro_test(residuals(m_out)) ggqqplot(df_physio_h2, "sum_biomass", ggtheme = theme_bw()) + facet_grid(water_condition + heat_condition ~ genotype, labeller = "label_both") df_physio_h2 %>% levene_test(sum_biomass ~ water_condition*heat_condition*genotype) summary(mod2) res.aov <- df_physio_h2 %>% anova_test(sum_biomass ~ water_condition*heat_condition*genotype) res.aov res.aov2 <- df_physio_h2 %>% welch_anova_test(sum_biomass ~ condition) # Pairwise comparisons (Games-Howell) pwc2 <- df_physio_h2 %>% games_howell_test(sum_biomass ~ condition) # Visualization: Boxplots with p-values mod=rlm(data=df_without_outlier,sum_biomass~heat_condition*water_condition) summary(mod) anova(mod) hist(df_without_outlier$sum_biomass, main = "Histogram of my data", xlab = "Valeurs", ylab = "Frequency", col = "skyblue", border = "white") test=df_physio_h2 %>% tukey_hsd(sum_biomass ~ genotype*water_condition*heat_condition) ``` ### Contribution of each variable related to biomass ```{r, eval=F} df_physio_h2_v<-df_physio_h2 %>% dplyr::select(plant_num,genotype,heat_condition,water_condition,sum_biomass,weight_leaf, weight_stem, weight_root,ratio_s_h,"leaf_area") %>% pivot_longer(-c(plant_num,genotype,heat_condition,water_condition),names_to = "variable") compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8)) for (i in 1:length(levels(as.factor(df_physio_h2_v$variable)))){ variable_l_i=as.character(levels(as.factor(df_physio_h2_v$variable))[i]) cat( "Variable:" ,variable_l_i, "\n") df_x= df_physio_h2_v %>% filter(variable==variable_l_i) %>% drop_na(value) formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = ""))) # Perform ANOVA using the aov function result_variable <- aov(data = df_x, formula = formula_string) anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`)) row_name<-rownames(anova_result) row_name <- gsub("genotype","Genotype",row_name) row_name <- gsub("water_condition","Water",row_name) row_name <- gsub("heat_condition","Heat",row_name) rownames(anova_result)<-row_name anova_result$variable=as.character(df_x$variable)[1] compile_result=rbind(compile_result,anova_result) } compile_result<-compile_result %>% as.data.frame() %>% rownames_to_column("contribution_variable") %>% mutate(Significance = case_when( `Pr(>F)` <= 0.001 ~ '***', `Pr(>F)` < 0.01 ~ '**', `Pr(>F)` < 0.05 ~ '*', `Pr(>F)` < 0.1 ~ '.', TRUE ~ ' ' )) compile_result=compile_result%>% filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>% mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% mutate(contribution_variable = as.character(contribution_variable)) %>% mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% mutate(text_output=paste0(round(R2*100,0), "% ", Significance)) plot_contrib<-ggplot(compile_result, aes(x = variable, y = R2, fill = contribution_variable)) + geom_bar(stat = "identity") + geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+ scale_fill_manual(values = c("#ffd166", # geno "#118ab2", # water "#ef476f", # heat "#06d6a0", # watergeno "#f78c6b", # genoheat "#FFB6C1", # waterheat "#333333"), name = "title of legend (contribution") +# all scale_y_continuous(labels = scales::percent_format())+ scale_color_identity()+ labs(x = "Compartment", y = "The relative contribution for the different variables (%)", ) + theme_minimal()+ theme(panel.grid = element_blank(), legend.title = element_blank(), # for delete title axis.text.x = element_text(angle = 45,hjust = 1, vjust = 1), axis.title.x = element_blank() ) ggsave(here::here("report/physio/plot/contribution.svg"),plot_contrib) ``` ![](plot/contribution.svg) ### Student test ```{r, eval=F} test=read.csv2(here::here("data/physio/global_physio.csv")) %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% filter(recolte==2) %>% drop_na(Hydric_potential) %>% #filter(climat_condition=="WW_OT") %>% t_test(Hydric_potential ~ condition,p.adjust.method = "BH") %>% add_significance() ``` ```{r,eval=F} df_physio_s=read.csv2(here::here("data/physio/global_physio.csv")) %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% filter(recolte==2) %>% drop_na(Hydric_potential) %>% #filter(genotype=="Stocata") %>% #filter(climat_condition%in% c("WW_OT","WS_HS")) %>% dplyr::group_by(condition) %>% dplyr::summarise(Mean=mean(Hydric_potential)) (df_physio_s$Mean[1]-df_physio_s$Mean[2])/df_physio_s$Mean[2] ``` ## For the data related to licor or leaf water pottential ### Data importation ```{r} df_physio_other<-read.csv2(here::here("data/physio/global_physio.csv")) %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", #"paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% # dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", "Hydric_potential", "Trmmol","Tleaf", "Ci","PARabs","CO2S", "PARo", "H2OS", "AHs.Cs","fda", "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow", "Cond", "Tair", "SVTleaf","Press", "CO2R", "RH_S", "Trans", "Tair_K","h2o_i", "VpdL", "PARi", "CndTotal","Ci_Pa", "PhiCO2","TBlk", "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R", "VpdA") %>% drop_na(condition,recolte) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) ``` ### Fast data visualization ```{r} x <- inspect_cat(df_physio_other) x %>% show_plot() ``` ```{r, eval=F} #inspect_cor() returns a tibble containing Pearson’s correlation coefficient, confidence intervals and p-values for pairs of numeric columns. The function combines the functionality of cor() and cor.test() in a more convenient wrapper. #To plot the comparison of the top 20 correlation coefficients for each condition (not show): # px<-inspect_cor(df_physio %>% # # filter(condition==i) %>% # filter(recolte==2)# %>% # #dplyr::select(starts_with("weight")) # ) %>% # slice(1:40) %>% show_plot() # print(px+labs(title = i)) ``` Summarises the prevalence of missing values by each column in a data frame. ```{r} inspect_na(df_physio_other) %>% show_plot() ``` ### Check for missing values ```{r} na_list <- lapply(df_physio_other, function(x) { plantid_with_na <- df_physio_other$plant_num[is.na(x)] return(plantid_with_na) }) ``` *Select only harvest 2* ```{r, eval=T} df_physio_other_h2=df_physio_other %>% filter(recolte==2) %>% filter(plant_num!=1113) %>% filter(!plant_num %in% c(87)) %>% #filter outlier mutate(condition = case_when( condition == "Sto_WW_OT" ~ "Stocata_WW_OT", condition == "Sto_WS_OT" ~ "Stocata_WS_OT", condition == "Sto_WW_HS" ~ "Stocata_WW_HS", condition == "Sto_WS_HS" ~ "Stocata_WS_HS", condition == "Wen_WW_OT" ~ "Wendy_WW_OT", condition == "Wen_WS_OT" ~ "Wendy_WS_OT", condition == "Wen_WW_HS" ~ "Wendy_WW_HS", condition == "Wen_WS_HS" ~ "Wendy_WS_HS", TRUE ~ condition # Keep other values unchanged )) %>% mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) ``` ### Contribution of each variable related to other variable ```{r,eval=F} df_physio_h2_v=df_physio_other_h2 %>% dplyr::select(-c(acronyme_analyse,position, storage_line, unit, analyse_by_plant, recolte, Flow,fda,Press, CO2R,RH_R)) %>% # not interesting data measured by lycor pivot_longer(-c(plant_num,genotype,heat_condition,water_condition,condition,climat_condition),names_to = "variable",values_to = "value") compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8)) for (i in 1:length(levels(as.factor(df_physio_h2_v$variable)))){ variable_l_i=as.character(levels(as.factor(df_physio_h2_v$variable))[i]) cat( "Variable:" ,variable_l_i, "\n") df_x= df_physio_h2_v %>% filter(variable==variable_l_i) %>% drop_na(value) formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = ""))) # Carry out ANOVA using the aov function result_variable <- aov(data = df_x, formula = formula_string) anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`)) row_name<-rownames(anova_result) row_name <- gsub("genotype","Genotype",row_name) row_name <- gsub("water_condition","Water",row_name) row_name <- gsub("heat_condition","Heat",row_name) rownames(anova_result)<-row_name anova_result$variable=as.character(df_x$variable)[1] compile_result=rbind(compile_result,anova_result) } compile_result<-compile_result %>% as.data.frame() %>% rownames_to_column("contribution_variable") %>% mutate(Significance = case_when( `Pr(>F)` <= 0.001 ~ '***', `Pr(>F)` < 0.01 ~ '**', `Pr(>F)` < 0.05 ~ '*', `Pr(>F)` < 0.1 ~ '.', TRUE ~ ' ' )) compile_result=compile_result%>% filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>% mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% mutate(contribution_variable = as.character(contribution_variable)) %>% mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% mutate(text_output=paste0(round(R2*100,0), "% ", Significance)) plot_contrib<-ggplot(compile_result, aes(x = variable, y = R2, fill = contribution_variable)) + geom_bar(stat = "identity") + geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+ scale_fill_manual(values = c("#ffd166", # geno "#118ab2", # water "#ef476f", # heat "#06d6a0", # watergeno "#f78c6b", # genoheat "#FFB6C1", # waterheat "#333333"), name = "title of legend (contribution") +# all scale_y_continuous(labels = scales::percent_format())+ scale_color_identity()+ labs(x = "Compartment", y = "The relative contribution for the different variables (%)", ) + theme_minimal()+ theme(panel.grid = element_blank(), legend.title = element_blank(), # for delete title axis.text.x = element_text(angle = 45,hjust = 1, vjust = 1), axis.title.x = element_blank() )+ coord_flip(); plot_contrib ggsave(here::here("report/physio/plot/contribution_other.svg"),plot_contrib, height = 16,width = 10) ``` ![](plot/contribution_other.svg) ### Summary of some interesting variables in ecophysiology #### Plot ```{r, eval=FALSE} df_evapo_H2 <- read.csv(here::here("data/water/weighing_watering_output/sum_evapotranspi.csv"))[-1] %>% dplyr::rename(plant_num = plant_id) %>% mutate(sum_tot_evapo_plant = sum_total_evapotranspiration/2) %>% filter(recolte == 2) %>% dplyr::select(plant_num, sum_tot_evapo_plant) df_physio_other_H2<-read.csv2(here::here("data/physio/global_physio.csv")) %>% left_join(.,df_evapo_H2, by = "plant_num") %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% # dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", #"paire","weight_stem","weight_leaf","weight_root" ,"leaf_area", "stem_area", "planimeter_leaf", "planimeter_stem", "SLA", "sum_biomass") %>% # # dplyr::select(acronyme_analyse, water_condition, heat_condition, genotype, "plant_num" ,"condition","position" ,"storage_line" ,"unit" ,"analyse_by_plant", "recolte", # "Hydric_potential", "Trmmol","Tleaf", "Ci","PARabs","CO2S", "PARo", "H2OS", "AHs.Cs","fda", "vp_kPa", "Twall_K","Tl.Ta", "CTair", "C2sfc", "SVTair","h20diff","Flow", "Cond", "Tair", "SVTleaf","Press", "CO2R", "RH_S", "Trans", "Tair_K","h2o_i", "VpdL", "PARi", "CndTotal","Ci_Pa", "PhiCO2","TBlk", "Ci.Ca", "CTleaf","CndCO2","RHsfc", "R.W.m2.","H2OR", "Photo", "RH_R", "VpdA") %>% drop_na(condition,recolte) %>% filter(recolte==2) Fig_evapo=stat_analyse( data=df_physio_other_H2 %>% drop_na(climat_condition,sum_tot_evapo_plant), column_value = "sum_tot_evapo_plant", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = "Total evapotranspiration (mL)", control_conditions = c("WW_OT"), strip_normale = F, hex_pallet = climate_pallet ) Fig_tleaf=stat_analyse( data=df_physio_other_H2 %>% drop_na(climat_condition,Tleaf), column_value = "Tleaf", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = "Leaf temperature (°C)", control_conditions = c("WW_OT"), strip_normale = F, hex_pallet = climate_pallet ) Fig_photo=stat_analyse( data=df_physio_other_H2 %>% drop_na(climat_condition,Photo), column_value = "Photo", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = expression("Photosynthesis" ~ mu * mol ~ CO[2] ~ m^-2 ~ s^-1), control_conditions = c("WW_OT"), strip_normale = F, hex_pallet = climate_pallet ) Fig_SLA=stat_analyse( data=df_physio_other_H2 %>% drop_na(climat_condition,SLA), column_value = "SLA", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = "Specific leaf area (m²/kg)", control_conditions = c("WW_OT"), strip_normale = F, hex_pallet = climate_pallet ) Fig_LWP=stat_analyse( data=df_physio_other_H2 %>% drop_na(climat_condition,Hydric_potential), column_value = "Hydric_potential", category_variables = c("climat_condition"), grp_var = "genotype", show_plot = T, outlier_show = F, label_outlier = "plant_num", biologist_stats = T, Ylab_i = "Leaf water potential (Mpa)", control_conditions = c("WW_OT"), strip_normale = F, hex_pallet = climate_pallet ) px=ggarrange( Fig_evapo[["plot"]]+labs(color="Treatment",fill="Treatment"), Fig_tleaf[["plot"]]+labs(color="Treatment",fill="Treatment"), Fig_photo[["plot"]]+labs(color="Treatment",fill="Treatment"), #Fig_SLA[["plot"]]+labs(color="Treatment",fill="Treatment"), Fig_LWP[["plot"]]+labs(color="Treatment",fill="Treatment"), ncol=4, nrow=1,common.legend = TRUE, legend="right", #labels = c("A","B","C", "D", "F"), align='h') px #export to delet png(here::here("report/physio/plot/evapo_tleaf_lwp.png"), width = 30, height = 8, units = 'cm', res = 900) grid.arrange (px) # Make plot dev.off() # test=df_physio_other_H2 %>%drop_na(SLA) %>% dplyr::select(plant_num,condition,leaf_scan,planimeter_leaf, leaf_area,weight_leaf,SLA) %>% mutate(scan=ifelse(is.na(planimeter_leaf),"scan","planimeter")) %>% mutate(cond_type=paste0(condition,"_",scan)) %>% # ggplot(aes(x=cond_type,y=leaf_area))+geom_boxplot() ``` ![](plot/evapo_tleaf_photo_sla_lwp.png) #### Contribution ```{r, eval=F} df_evapo_H2 <- read.csv(here::here("data/water/weighing_watering_output/sum_evapotranspi.csv"))[-1] %>% dplyr::rename(plant_num = plant_id) %>% mutate(sum_tot_evapo_plant = sum_total_evapotranspiration/2) %>% filter(recolte == 2) %>% dplyr::select(plant_num, sum_tot_evapo_plant) df_physio_other_H2_v<-read.csv2(here::here("data/physio/global_physio.csv")) %>% left_join(.,df_evapo_H2, by = "plant_num") %>% mutate(across(c("genotype","water_condition","heat_condition"), as.factor)) %>% mutate(across(c("storage_line","position"), as.integer)) %>% mutate(water_condition=relevel(water_condition, "WW")) %>% mutate(heat_condition=relevel(heat_condition, "OT")) %>% mutate(climat_condition=paste0(water_condition,"_",heat_condition)) %>% mutate(climat_condition=factor(climat_condition,levels=c("WW_OT","WS_OT","WW_HS","WS_HS"))) %>% mutate(condition=factor(condition,levels=c("Sto_WW_OT","Sto_WS_OT","Sto_WW_HS","Sto_WS_HS","Wen_WW_OT","Wen_WS_OT","Wen_WW_HS","Wen_WS_HS"))) %>% drop_na(condition,recolte) %>% filter(recolte==2) %>% dplyr::select(water_condition, heat_condition, genotype, plant_num, condition, climat_condition, sum_tot_evapo_plant, Tleaf,Photo,#SLA, Hydric_potential) %>% pivot_longer(-c(plant_num,genotype,heat_condition,water_condition,condition,climat_condition),names_to = "variable",values_to = "value") %>% mutate(variable = case_when( variable == "sum_tot_evapo_plant" ~ "Total evapotranspiration", variable == "Tleaf" ~ "Leaf temperature", variable == "Photo" ~ "Photosynthesis", variable == "SLA" ~ "Specific leaf area", variable == "Hydric_potential" ~ "Leaf water potential", TRUE ~ variable ) ) compile_result=as.data.frame(matrix(data=NA,nrow = 0, ncol = 8)) for (i in 1:length(levels(as.factor(df_physio_other_H2_v$variable)))){ variable_l_i=as.character(levels(as.factor(df_physio_other_H2_v$variable))[i]) cat( "Variable:" ,variable_l_i, "\n") df_x= df_physio_other_H2_v %>% filter(variable==variable_l_i) %>% drop_na(value) formula_string <- as.formula(paste("value", "~", paste("genotype","*","water_condition","*","heat_condition", sep = ""))) # Carry out ANOVA using the aov function result_variable <- aov(data = df_x, formula = formula_string) anova_result=summary(result_variable)[[1]] %>% mutate(R2 = `Sum Sq` / sum(`Sum Sq`)) row_name<-rownames(anova_result) row_name <- gsub("genotype","Genotype",row_name) row_name <- gsub("water_condition","Water",row_name) row_name <- gsub("heat_condition","Heat",row_name) rownames(anova_result)<-row_name anova_result$variable=as.character(df_x$variable)[1] compile_result=rbind(compile_result,anova_result) } compile_result<-compile_result %>% as.data.frame() %>% rownames_to_column("contribution_variable") %>% mutate(Significance = case_when( `Pr(>F)` <= 0.001 ~ '***', `Pr(>F)` < 0.01 ~ '**', `Pr(>F)` < 0.05 ~ '*', `Pr(>F)` < 0.1 ~ '.', TRUE ~ ' ' )) library(forcats) compile_result=compile_result%>% filter(str_detect(contribution_variable, "Total", negate = TRUE)) %>% filter(str_detect(contribution_variable, "Residual", negate = TRUE)) %>% mutate(contribution_variable = str_replace_all(contribution_variable, " ", "")) %>% mutate_at(vars(contribution_variable), ~str_replace_all(., "\\$", ""))%>% mutate_at(vars(contribution_variable), ~str_replace_all(., "[0-9]", "")) %>% mutate(contribution_variable = as.character(contribution_variable)) %>% mutate(fontcolor = ifelse(contribution_variable %in% c("Water", "Genotype:Water:Heat"), "#ffffff", "#000000")) %>% mutate(contribution_variable=fct_relevel(contribution_variable,c("Genotype","Water","Heat", "Genotype:Water","Genotype:Heat","Water:Heat","Genotype:Water:Heat"))) %>% mutate(text_output=paste0(round(R2*100,0), "% ", Significance)) %>% mutate(variable=factor(variable,levels=c("Total evapotranspiration","Leaf temperature","Photosynthesis","Leaf water potential"))) %>% arrange(desc(variable)) plot_contrib<-ggplot(compile_result, aes(x = fct_inorder(variable), y = R2, fill = contribution_variable)) + geom_bar(stat = "identity") + geom_text(aes(label = ifelse(R2 > 0.02, text_output, "")), color = compile_result$fontcolor, position = position_stack(vjust = 0.5), size = 2.5)+ scale_fill_manual(values = c("#ffd166", # geno "#118ab2", # water "#ef476f", # heat "#06d6a0", # watergeno "#f78c6b", # genoheat "#FFB6C1", # wate rheat "#333333"), name = "title of legend (contribution") +# all scale_y_continuous(labels = scales::percent_format())+ scale_color_identity()+ labs(x = "Variable", y = "The relative contribution for the different variables (%)", ) + theme_classic()+ theme(panel.grid = element_blank(), legend.title = element_blank(), # for delete title axis.text.x = element_text(angle = 0,hjust = 1, vjust = 1), axis.title.x = element_blank(), axis.line = element_line(size = 0.3) )+ coord_flip(); plot_contrib ggsave(here::here("report/physio/plot/contrib_physio_interesting_var.svg"),plot_contrib, height = 2,width = 11) ggsave(here::here("report/physio/plot/contrib_physio_interesting_var.png"),plot_contrib, height = 2,width = 11, dpi = 600) ```