5  Pre-processing of ecophysiological data

Code
# pkg
library(readxl)
library(tidyverse)
library(dplyr)
 
#function
# source(here::here("src/function/stat_function/stat_analysis_main.R")) # for make plot 
# source("src/function/graph_function.R")

5.1 Data importation

Code
data_physio=read_excel(here::here("data/physio/raw_data_physio/dataset_physio_soybean_xp1.xlsx"))
plant_info=read_excel(here::here("data/plant_information.xlsx"))

global_physio=merge(data_physio,plant_info,by="plant_num",all=T)

# Add scan surface #####################################
leaf_scan=read.csv(here::here("data/physio/raw_data_physio/result_nb_pixel_leaf_scan.csv"))
stem_scan=read.csv(here::here("data/physio/raw_data_physio/result_nb_pixel_stem_scan.csv"))

5.2 Transform label into number and renome columne

Code
gr <- gregexpr("[0-9\\.]+" , leaf_scan$Label )
resultat <- as.integer(t( sapply(regmatches(leaf_scan$Label , gr) , as.numeric)))
leaf_scan$Label=resultat
colnames(leaf_scan)=c("plant_num","leaf_scan","X")
leaf_scan$leaf_scan=leaf_scan$leaf_scan*0.0063796*0.0063796

gr <- gregexpr("[0-9\\.]+" , stem_scan$Label )
resultat <- as.integer(t( sapply(regmatches(stem_scan$Label , gr) , as.numeric)))
stem_scan$Label=resultat
colnames(stem_scan)=c("plant_num","stem_scan","X")
stem_scan$stem_scan=stem_scan$stem_scan*0.0063796*0.0063796

df_scan=merge(stem_scan,leaf_scan,by="plant_num",all=T)
df_scan=df_scan%>%
  dplyr::select("plant_num","leaf_scan","stem_scan")

global_physio=merge(global_physio,df_scan,by="plant_num",all=T)

5.3 Recalculation of some variable

Code
# Compilation in stem_area or leaf_area for planimeter and scan 
global_physio=global_physio %>%rowwise() %>% mutate(leaf_area = ifelse(is.na(planimeter_leaf), leaf_scan, planimeter_leaf))
global_physio=global_physio %>%rowwise() %>% mutate(stem_area = ifelse(is.na(planimeter_stem), leaf_scan, planimeter_stem))

# Replace0 by NA
global_physio$leaf_area[global_physio$leaf_area == 0] <- NA
global_physio$stem_area[global_physio$stem_area == 0] <- NA

# Calculate SLA (specific leaf area) 
# Tha variables calculated were area / mass of leaf in m²/kg , which is the ratio of leaf area (cm² to m²) by the leaf dry biomass (g to kg)
global_physio$SLA=(global_physio$leaf_area*0.0001)/(global_physio$weight_leaf*0.001)

# Calculate the sum of the biomass
global_physio$sum_biomass=global_physio$weight_leaf+global_physio$weight_stem+global_physio$weight_root

# Calculate the shoot_root_ratio
global_physio$shoot_root_ratio=(global_physio$weight_leaf+global_physio$weight_stem)/global_physio$weight_root

# Convert Leaf Water Potential
global_physio$Hydric_potential=global_physio$Hydric_potential*-0.1

5.4 Select the interesting data

Code
global_physio_select=global_physio%>%
  dplyr::select(Hydric_potential, weight_stem,weight_leaf,weight_root,weight_cotyledon,leaf_scan,leaf_area,stem_scan,stem_area,planimeter_leaf,planimeter_stem,SLA,sum_biomass,plant_num,shoot_root_ratio)

5.5 Merge physio, licor and plant_info

Code
licor=read.csv(here::here("data/physio/raw_data_physio/result_licor.csv"))[2:23]
licor_h <- licor %>% 
  pivot_wider(names_from="parametre",
              values_from="value")
licor_h_select=licor_h %>% 
  dplyr::select(plant_num,colnames(licor_h)[21:64])

plant_info$plant_num=as.integer(plant_info$plant_num)
global_physio=merge(global_physio_select,licor_h_select, by="plant_num", all=T)
global_physio=merge(plant_info,global_physio,by="plant_num", all=T)
Delet extrem outlier

I delet Extrem outlier because problem in root biomass or extreme outlier in multiple analyses

Code
global_physio_clean=global_physio %>% 
  filter(plant_num!=1113) %>% 
  filter(!plant_num %in% c(87))

5.6 Export

Code
write.csv2(global_physio_clean,here::here("data/physio/global_physio.csv"),row.names = F)