lanceur_reference.R 43.24 KiB
# Lanceur version 0.5
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------- TABLE OF CONTENT ---------------------------------------------------------------------------------------------
# 1. Installing MCQR
# 2. Analyzing spectral counting (SC) data
# 2.1. Data and metadata loading
# 2.2. Checking data quality
# 2.3. Filtering proteins
# 2.4. Overview of protein abundances
# 2.5. Analyzing protein abundance variations
# 2.6. Exporting results in '/path/to/my_working_directory'
# 3. Analyzing quantification data obtained from extracted ion chromatogram (XIC)
# 3.1 Data and metadata loading
# 3.2. Checking data quality
# 3.2.1. Checking chromatography
# 3.2.2. Checking injections
# 3.3. Analyzing intensity data (quantitative variations)
# 3.3.1. Filtering dubious data
# 3.3.2. Normalizing peptide-mz intensities to take into account possible global quantitative variations between LC-MS runs
# 3.3.3. Filtering for shared, unreproducible or uncorrelated peptides-mz
# 3.3.4. Imputing missing peptide-mz intensities
# 3.3.5. Computing protein abundances
# 3.3.6. Imputing missing protein abundances
# 3.3.7. Overview of the protein abundance data
# 3.3.8. Analyzing protein abundance variations
# 3.3.9. Exporting results in '/path/to/my_working_directory'
# 3.4. Analyzing peak counting data (semi-quantitative variations)
# 3.4.1. Filtering data to remove peptides-mz with dubious chromatographic data
# 3.4.2. Counting 'peaks'
# 3.4.3. Analyzing protein abundance variations
# 3.4.4. Exporting results in '/path/to/my_working_directory'
# 4. Exporting a synthesis
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#****************************************************** 1. Installing MCQR
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
# See the file "lisezmoi.txt"
# Loading MCQR package
library("MCQR")
# Checking version
packageVersion("MCQR")
# Changing working directory (replace '/path/to/my_working_directory' by your computer tree)
setwd ("/path/to/my_working_directory")
## the data used for examples come from "janv2017_shotgun_labelfree_tests_v3"
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#****************************************************** 2. Analyzing spectral counting (SC) data
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#****************************************************************************************************************
# 2.1. Data and metadata loading
#****************************************************************************************************************
##!!!!!!!!!!!!!!!!!!!!! The inputfile containing spectral count data must be in .txt format from Xtandempipeline or in .tsv format from xtpcpp.
## To this end, export the SpectralCounting mcq file from xtpcpp ('myfilename.tsv')
## or export "Peptide, protein and compar results as spreadsheet" in "Tabulated text" from XtandemPipeline. XtandemPipeline will automatically produce a file named 'myresults_compar_spectra.txt'.!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Loading spectral counting data
SC <- mcq.read.spectral.counting("results_ovules_compar_spectra.txt")
# Exporting a template for metadata (either in .txt or .tsv which is equivalent to csv except that the text separator is a tab). The file is saved in '/path/to/my_working_directory'.
mcq.write.metadata.template(SC, file="metadata.tsv")
#!!!!!!!!!!!!!!!!!!!!! Open the template in Libreoffice or Excel and fill in the columns with all the information that you have about your injections (e.g. information relative to the experimental design such as the treatment, the replicate, the genotype, etc.). Save the file in an .ods format.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Importing the filled in metadata file
META <- mcq.read.metadata("metadata_SC.ods")
# Attaching metadata to spectral counting data
SC <- mcq.merge.metadata(SC, META)
# Overview of the content of the data
summary (SC)
# Retrieve metadata
metadata <- mcq.get.metadata(SC)
metadata
#****************************************************************************************************************
# 2.2. Checking data quality
#****************************************************************************************************************
############# Checking the total number of spectra for each sample
# Display of the graph on the screen
mcq.plot.counts(SC)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.counts(SC, file="SC_counts_per_sample.pdf")
############# Checking the experiment by a principal component analysis (PCA). Is there any outlier ?
# PCA using all the injections as individuals
SC_PCA <- mcq.compute.pca(SC)
## Known Bug : you can't specifiy multpile column name labels if you use "msrun" column label
# Display of the PCA on the screen. Possibility to modify the 'factorToColor', 'labels' and 'tagType' arguments to change the colors and the labels of the individuals represented on the PCA
mcq.plot.pca(SC_PCA, factorToColor=c("jour", "zone"), labels = "msrun", tagType="both")
mcq.plot.pca(SC_PCA, factorToColor=c("jour", "zone"), labels = c("jour", "zone", "rep"), tagType="label", labelSize = 3)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pca(SC_PCA, factorToColor=c("jour", "zone"), labels = c("jour", "zone", "rep"), tagType="label", labelSize = 3, file="SC_PCA_graphs.pdf")
# Exporting the PCA results in '/path/to/my_working_directory'
mcq.write.compar(SC_PCA, file="SC_PCA.tsv")
############# If necessary, removal of dubious injections
# E.g. in the factor 'msrun', removal of the level 'msrunb11'
# Take a look at the help (help(mcq.drop)) to see the factors you can remove
# You can also select a few injections for the analyze with the mcq.select function. It's the reverse function of mcq.drop.
SC_test <- mcq.drop(SC, factor="msrun", levels="mcqsca0")
# Display of a summary of the 'protPep' object after removal of dubious injections
summary(SC_test)
#****************************************************************************************************************
# 2.3. Filtering proteins
#****************************************************************************************************************
############# Removing proteins showing low numbers of spectra (< 5) in all the injections (the protein abundances are not reliable when the number of spectra is less than 5)
############# For an example, if cutoff = 5, all the proteins quantified with only 0, 1, 2, 3 or 4 spectra in each of the injections will be removed.
SC <- mcq.drop.low.counts(SC, cutoff=5)
############# Removing proteins showing little variation in their number of spectra. The selection criteria is the ratio between the minimal and the maximal mean abundance values computed for a factor or a combination of factors of interest.
SC <- mcq.drop.low.fold.changes(SC, cutoff=1.5, flist=c("jour", "zone"))
#****************************************************************************************************************
# 2.4. Overview of protein abundances
#****************************************************************************************************************
############# Plotting a heatmap
# Display of the graph on the screen. Possibility to change the color, the method of aggregation ('method' argument) and the method to compute the distances ('distfun' argument). See help for details
mcq.plot.heatmap(SC, flist=c("jour", "zone"), factorToColor=c("jour", "zone"))
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.heatmap(SC, flist=c("jour", "zone"), factorToColor=c("jour", "zone"), file="SC_heatmap.pdf")
# Exporting the graph in a .pdf file in '/path/to/my_working_directory' and the protein list to write
heatmap.prot.list <- mcq.plot.heatmap(SC, flist=c("jour", "zone"), factorToColor=c("jour", "zone"), file="SC_heatmap.pdf", getProtlist =TRUE)
# Exporting the protein list in a .txt or .csv file in '/path/to/my_working_directory'
mcq.write.dataframe(heatmap.prot.list, file="SC_heatmap_proteins_list.tsv")
############# Clustering the proteins
# Cluster for the factor(s) of interest
SC_cluster <- mcq.compute.cluster(SC, flist=c("jour", "zone"), nbclust=8)
# Display of the graph on the screen
mcq.plot.cluster(SC_cluster)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.cluster(SC_cluster, file="SC_cluster.pdf")
# Exporting the results in a .txt or .csv file in '/path/to/my_working_directory'
mcq.write.compar(SC_cluster, file="SC_cluster_results.tsv")
#****************************************************************************************************************
# 2.5. Analyzing protein abundance variations
#****************************************************************************************************************
############# Differential analysis
# ANOVA for the factors of interest
# Run anova without interaction term, if you want to perform tukey test with counting data
SC_ANOVA <- mcq.compute.anova(SC, flist=c("jour", "zone"),inter=FALSE)
# Display the distribution of the p-values for a factor of interest on the screen
mcq.plot.pvalues(SC_ANOVA)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pvalues(SC_ANOVA, file="SC_pvalues_distribution.pdf")
############# Analyzing the proteins showing significant variations
# Retrieving the list of proteins showing significant abundance variations for a given factor
SC_PROTEINS_selected_jour <- mcq.select.pvalues(SC_ANOVA, padjust=TRUE, alpha=0.05, flist="jour")
SC_PROTEINS_selected_zone <- mcq.select.pvalues(SC_ANOVA, padjust=TRUE, alpha=0.05, flist="zone")
# Retrieving spectral counts for the selected proteins
SC_selected_jour <- mcq.select.from.protlist(SC, SC_PROTEINS_selected_jour)
# Tukey test
# WARNING : Verify your packageVersion of agricolae (with the R function packageVersion("agricolae")), it must be the 1.2.9 version
#(the URL provide in the PAPPSO site it's an unstable version, waiting for the stable one)
SC_selected_tukey <- mcq.compute.tukey(SC_ANOVA, flist="jour", protlist=SC_PROTEINS_selected_jour)
# Display of the graph on the screen
mcq.plot.tukey(SC_selected_tukey, qprot=SC)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.tukey(SC_selected_tukey, qprot=SC, file="SC_Tukey.pdf")
# Exporting the values of Tukey test in a .tsv file
mcq.write.dataframe(SC_selected_tukey, file="SC_Tukey.tsv")
# Handling protein lists
SC_PROTEINS_selected_jour <- mcq.select.pvalues(SC_ANOVA, padjust=TRUE, alpha=0.05, flist="jour")
SC_PROTEINS_selected_zone <- mcq.select.pvalues(SC_ANOVA, padjust=TRUE, alpha=0.05, flist="zone")
diff_list <- setdiff(SC_PROTEINS_selected_jour,SC_PROTEINS_selected_zone)
union_list <- union(SC_PROTEINS_selected_jour,SC_PROTEINS_selected_zone)
intersect_list <- intersect(SC_PROTEINS_selected_jour,SC_PROTEINS_selected_zone)
#selection of protein in union_list
SC_selected_union <- mcq.select.from.protlist(SC, union_list)
############# Plot of individual protein abundances
# Display of the graph on the screen
mcq.plot.protein.abundance(SC_selected_union, factorToColor=c("jour","zone"), flist=c("jour","zone"))
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.protein.abundance(SC_selected_union, factorToColor=c("jour","zone"), flist=c("jour","zone"), file="SC_plot_protein_abundances.pdf")
############# Analyzing protein abundance ratios
# Computing ratios between two levels of a factor of interest
SC_RATIO <- mcq.compute.fold.change(SC_selected_union, flist="jour", numerator="ss5", denominator="ss0")
# Display of the graph on the screen
mcq.plot.fold.change(SC_RATIO)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.fold.change(SC_RATIO, file="SC_RATIO_plot.pdf")
# Exporting the values of ratio in a .tsv file
mcq.write.dataframe(SC_RATIO, file="SC_RATIO_values.tsv")
#****************************************************************************************************************
# 2.6. Exporting results in '/path/to/my_working_directory'
#****************************************************************************************************************
# Exporting spectral counts in a 'compare' format (one sample = one column)
mcq.write.compar(SC, file="SC.tsv", columnID="msrun")
# Exporting ANOVA results (only p-values) in a 'compare' format (one sample = one column)
mcq.write.dataframe(SC_ANOVA, file="SC_ANOVA.tsv")
# Exporting the spectral counts together with ANOVA results
mcq.write.merge(SC, SC_ANOVA, file="SC_ANOVA_merged.tsv", columnID="msrunfile")
# Exporting spectral counts together with ANOVA results for the proteins showing significant variations
SC_ANOVA_selected <- mcq.select.from.protlist(SC_ANOVA, protlist=union_list)
mcq.write.merge(SC_selected_union, SC_ANOVA_selected, file="SC_ANOVA_selected_merged.tsv", columnID="msrunfile")
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#****************************************************** 3. Analyzing quantification data obtained from extracted ion chromatogram (XIC)
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#********************************************************
#--------------------------------------------------------
# 3.1 Data and metadata loading
#--------------------------------------------------------
#********************************************************
# Loading MassChroq data. NB: peptide intensities measured as peak areas are automatically log10-transformed.
XICRAW <- mcq.read.masschroq(protFile="G1_proteins.tsv", pepFile="peptides_q1_G1.tsv")
# Overview of the content of the 'XICRAW' object
summary (XICRAW)
# Exporting a template for the metadata (either in .txt or .tsv which is equivalent to csv except that the text separator is a tab). The file is save in '/path/to/my_working_directory'.
mcq.write.metadata.template(XICRAW, file="metadata.tsv")
#!!!!!!!!!!!!!!!!!!!!! Open the template in Libreoffice or Excel and fill in the columns with all the information that you have about your injections (e.g. information relative to the experimental design such as the treatment, the replicate, the genotype, etc.). Save the file in an .ods format.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Importing metadata
META <- mcq.read.metadata("metadata_XICRAW.ods")
# Attaching metadata to quantification data
XICRAW <- mcq.merge.metadata(XICRAW, metadata=META)
#********************************************************
#--------------------------------------------------------
# 3.2. Checking data quality
#--------------------------------------------------------
#********************************************************
#****************************************************************************************************************
# 3.2.1. Checking chromatography
#****************************************************************************************************************
############# Checking the distribution of the width of chromatographic peaks. Are there many very large peaks?
# Display of the graph on the screen
mcq.plot.peak.width(XICRAW)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.peak.width(XICRAW, file="XICRAW_distribution_peak_width.pdf")
############# Checking the variability of the peptide-mz retention time (RT) by plotting the distribution of the standard deviation of RT. Are there many peptides-mz showing huge variations of their RT?
# Display of graph on the screen. If necessary, it is possible to limit the range of the x axis (e.g. here, 40s).
mcq.plot.rt.variability(XICRAW, limit=80,width=1)
# Export of the graph in a .pdf file in '/path/to/my_working_directory
mcq.plot.rt.variability(XICRAW, limit=80, file="XICRAW_distribution_rt_variability.pdf")
#****************************************************************************************************************
# 3.2.2. Checking injections
#****************************************************************************************************************
############# Checking the number of chromatographic peaks quantified in each sample. Are there injections with much fewer peaks?
# Display of the graph on the screen
mcq.plot.counts(XICRAW)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.counts(XICRAW, file="XICRAW_counts_per_injection.pdf")
############# Checking the distribution of the peptide intensities in the injections. Are there injections showing odd distribution?
# Display of the graph on the screen
mcq.plot.intensity.violin(XICRAW, factorToColor="rep")
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.violin(XICRAW, factorToColor="rep", file="XICRAW_violin_intensity.pdf")
############# Checking the median intensity along the chromatography. Are there injections showing abnormal intensity profiles?
# Display of the graph on the screen
mcq.plot.intensity.profile(XICRAW, factorToColor="rep", RTinterval=250)
# Display an interactive graph on the navigator
mcq.plot.intensity.profile(XICRAW, factorToColor="rep", RTinterval=250, interactiveView=TRUE)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.profile(XICRAW, factorToColor="rep", RTinterval=250, file="XICRAW_intensity_profiles.pdf")
############# Checking the experiment by a principal component analysis (PCA). Is there any outlier ?
# PCA using all the injections as individuals and peptides-mz as variables
XICRAW_PCA <- mcq.compute.pca(XICRAW)
# Display of the PCA graphs on the screen. Possibility to modify the 'factorToColor' and 'labels' arguments to change the colors and the labels of the individuals represented on the PCA.
mcq.plot.pca(XICRAW_PCA, factorToColor=c("rep"), labels = c("jour", "zone","rep"), tagType="both")
# Exporting the graphs in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pca(XICRAW_PCA, factorToColor=c("rep"), labels = c("jour", "zone","rep"), tagType="both", labelSize = 0.8, file="XICRAW_PCA_graphs.pdf")
# Exporting the PCA results in '/path/to/my_working_directory'
mcq.write.compar(XICRAW_PCA, file="XICRAW_PCA_results.tsv")
############# Checking the distribution of the peptide-mz log-intensities in each sample. Is there injections where intensities are not normally distributed?
# Display of the graph on the screen
mcq.plot.intensity.distribution(XICRAW, width=0.1)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.distribution(XICRAW, width=0.1, file="XICRAW_intensity_distribution_per_injection.pdf")
############# Checking the global correlation between the peptide-mz intensities of one sample and the peptide-mz intensities of a sample chosen as reference. Are the correlation globally correct?
# Display of the graph on the screen
mcq.plot.intensity.correlation(XICRAW, ref="sampa0")
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.correlation(XICRAW, ref="sampa0", file="XICRAW_intensity_correlation_between_injections.pdf")
#********************************************************
#--------------------------------------------------------
# 3.3. Analyzing intensity data (quantitative variations)
#--------------------------------------------------------
#********************************************************
#****************************************************************************************************************
# 3.3.1. Filtering dubious data
#****************************************************************************************************************
############# If necessary, removing dubious injections
# E.g. in the factor 'msrun', removal of the level 'msrunb11'
# Take a look at the help (help(mcq.drop)) to see the factors you can remove
# You can also select a few injections for the analyze with the mcq.select function. It's the reverse function of mcq.drop.
XIC_test <- mcq.drop(XICRAW, factor="msrun", levels=c("sampb11"))
# THINK to replace XICRAW by XIC in the next step
############# If necessary, removing dubious chromatographic data
# Removing peptides-mz showing too much variations of their retention time. These peptides-mz may occur from mis-identifications. Use the plot produced by the mcq.plot.rt.variability function to decide on the cut-off of standard deviation to use (e.g. here, 20 seconds).
XIC <- mcq.drop.variable.rt(XICRAW, cutoff=20)
# Removing peptides-mz associated to very large chromatographic peaks. Use the plot produced by the mcq.plot.peak.width function to decide on the cut-off to use (e.g. here, 200 seconds). Don't be too stringent with this filter because the peak width is proportional to the peak height. A large peak may therefore simply correspond to a peptide-mz that is particularly intense in a given sample.
XIC <- mcq.drop.wide.peaks(XIC, cutoff=200)
# Removing a range of retention time in the middle of the chromatography
XIC_test <- mcq.drop.rt.range(XIC, rtmin=700, rtmax=800)
# Removing a range of retention time at the beginning and/or at the end of the chromatography (in this case, the undesired RT range is dropped by selecting its desired counterpart)
XIC_test <- mcq.select.rt.range(XIC, rtmin=700, rtmax=2500)
############# Display of a summary of the 'XIC' object after removal of dubious peptides
summary(XIC)
#****************************************************************************************************************
# 3.3.2. Normalizing peptide-mz intensities to take into account possible global quantitative variations between LC-MS runs
#****************************************************************************************************************
############# Normalizing peptide-mz intensities
# Normalization by a median-based method ("median" or "median.RT", based on a reference sample) or by a "percent" method. See help for details
XIC <- mcq.compute.normalization(XIC, ref="sampa1", method="median.RT")
############# Controling the effect of normalization on intensities distribution
# Display of the graph of intensity distributions on the screen
mcq.plot.intensity.violin(XIC, factorToColor="rep")
# Exporting the graph of intensity distributions in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.violin(XIC, factorToColor="rep", file="XIC_violin_intensity_after_normalization.pdf")
############# Controling the effect of normalization on intensity profiles
# Display of the graph of intensity profile along the chromatography on the screen
mcq.plot.intensity.profile(XIC, factorToColor="rep", RTinterval=250)
# Exporting the graph of intensity profile along the chromatography in a .pdf file in '/path/to/my_working_directory'
mcq.plot.intensity.profile(XIC, factorToColor="rep", RTinterval=250, file="XIC_intensity_profiles_after_normalization.pdf")
# PCA using all the injections as individuals
XIC_PCA <- mcq.compute.pca(XIC)
# Display of the PCA on the screen. Possibility to modify the 'factorToColor' and 'labels' arguments to change the colors and the labels of the individuals represented on the PCA.
mcq.plot.pca(XIC_PCA, factorToColor=c("jour", "zone"), labels = c("jour", "zone","rep"))
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pca(XIC_PCA, factorToColor=c("jour", "zone"), labels = c("jour", "zone","rep"), tagType="both", labelSize=2, file="XIC_PCA_graphs_after_normalization.pdf")
#****************************************************************************************************************
# 3.3.3. Filtering for shared, unreproducible or uncorrelated peptides-mz
#****************************************************************************************************************
############# Checking the distribution of the number of proteins to which a peptide-mz belongs. Remark : a peptide that belongs to only one protein in the dataset is not proteotypic to this protein. It is in fact specific of a sub-group of XTandemPipeline
# Display of the graph on the screen
mcq.plot.peptide.sharing(XIC)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.peptide.sharing(XIC, file = "XIC_shared_peptides.pdf")
############# Removing shared peptides
XIC <- mcq.drop.shared.peptides(XIC)
############# Checking the reproducibility of the peptides-mz
# Display of the graph on the screen
mcq.plot.peptide.reproducibility(XIC)
# Exporting the graphe in a .pdf file in '/path/to/my_working_directory'
mcq.plot.peptide.reproducibility(XIC, file="XIC_peptide_reproducibility.pdf")
############# Removing peptides-mz showing too many missing data (here 5%)
XIC <- mcq.drop.unreproducible.peptides(XIC, method="percent", percentNA=0.1)
# Display of a summary of the 'XIC' object after removal of peptides that are absent in more than a given proportion of the injections
summary(XIC)
############# Checking of the correlation of the peptides-mz belonging to a same protein
# Display of the graph on the screen
PROT10 <- mcq.plot.peptide.intensity(XIC, flist=c("jour", "zone"), rCutoff = 0.5, nprot=10, getProtlist=TRUE)
# Exporting the graphe in a .pdf file in '/path/to/my_working_directory' with the same parameters and the same proteins
mcq.plot.peptide.intensity(XIC, flist=c("jour", "zone"), rCutoff = 0.5, protlist=PROT10, file="XIC_peptide_correlation_pval1.pdf")
############# Removing the peptides-mz whose intensity profile deviates from the average profile of the peptides-mz from the same protein. This filter must be applied only if the number of levels within one of the factors of interest (factors like replicates not included) is superior or equal to 5 because it is based on the correlation between peptides-mz belonging to the same protein.
XIC <- mcq.drop.uncorrelated.peptides(XIC, flist=c("jour", "zone"), rCutoff = 0.5)
############# Removing proteins quantified by a small number of peptides-mz. When the minimal number of peptides-mz required is 2, this filter is automatically included in the mcq.drop.uncorrelated.peptides function. The function mcq.drop.proteins.with.few.peptides is therefore unnecessary.
XIC <- mcq.drop.proteins.with.few.peptides(XIC, npep=2)
# Display of a summary of the 'XIC' object after removal of shared peptides
summary(XIC)
#****************************************************************************************************************
# 3.3.4. Imputing missing peptide-mz intensities
#****************************************************************************************************************
# Imputing missing peptide-mz intensities. No imputation is made for peptide-mz intensity when a protein is completely absent from a sample.
XIC <- mcq.compute.peptide.imputation(XIC, method="irmi")
############# Intensity profiles for the peptide_mz belonging to the same protein
# Display of the graph on the screen for 10 proteins
mcq.plot.peptide.intensity(XIC, showImputed = TRUE, nprot=10, log=TRUE)
# Exporting the graphe in a .pdf file in '/path/to/my_working_directory'
mcq.plot.peptide.intensity(XIC, showImputed = TRUE ,nprot=10, file="imputed_peptide_intensity_with_imputation.pdf", log=TRUE)
#****************************************************************************************************************
# 3.3.5. Computing protein abundances
#****************************************************************************************************************
############# Computing protein abundances
# Computation of the protein abundances as the sum of the peptide intensities
XICAB <- mcq.compute.protein.abundance(XIC, method="sum")
# Creating a table containing the protein abundances in a 'compare' format (one sample = one column)
ABUNDANCE_TABLE <- mcq.get.compar(XICAB)
# Display of the first lines of the table
head(ABUNDANCE_TABLE)
#****************************************************************************************************************
# 3.3.6. Imputing missing protein abundances
#****************************************************************************************************************
############# Imputing missing protein abundances by the minimum abundance obtained for each protein in the whole experiment
XICAB <- mcq.compute.protein.imputation(XICAB)
#****************************************************************************************************************
# 3.3.7. Overview of the protein abundance data
#****************************************************************************************************************
############# PCA representation
# PCA using a factor or a combination of factors as individuals (in this case, average abundances are computed for the factor or for the combination of factors)
XICAB_PCA <- mcq.compute.pca(XICAB, flist=c("jour", "zone"))
# Display of the PCA on the screen. Possibility to modify the 'factorToColor' and 'labels' arguments to change the colors and the labels of the individuals represented on the PCA.
mcq.plot.pca(XICAB_PCA, factorToColor=c("jour", "zone"), labels =c("jour", "zone"), tagType="both", labelSize=3)
# Exporting the PCA graphs in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pca(XICAB_PCA, factorToColor=c("jour", "zone"), labels =c("jour", "zone"), tagType="both", labelSize=3, file="XICAB_PCA_graphs.pdf")
# Export of the individual coordinates and components in '/path/to/my_working_directory'
mcq.write.compar(XICAB_PCA, file="XICAB_PCA.tsv")
############# Plotting a heatmap
# Display of the graph on the screen. Possibility to change the method of aggregation ('method' argument) and the method to compute the distances ('distfun' argument).
mcq.plot.heatmap(XICAB, flist=c("jour", "zone"), factorToColor="jour")
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.heatmap(XICAB, flist=c("jour", "zone"), factorToColor="jour", file="XICAB_heatmap.pdf")
# Exporting the graph in a .pdf file in '/path/to/my_working_directory' and the protein list to write
heatmap.prot.list <- mcq.plot.heatmap(XICAB, flist=c("jour", "zone"), factorToColor="jour", file="XICAB_heatmap.pdf", getProtlist =TRUE)
# Exporting the protein list in a .txt or .csv file in '/path/to/my_working_directory'
mcq.write.dataframe(heatmap.prot.list, file="XICAB_heatmap_proteins_list.tsv")
############# Clustering the proteins
# Cluster for the factor(s) of interest
XICAB_cluster <- mcq.compute.cluster(XICAB, flist=c("zone", "jour"), nbclust=8)
# Display of the graph on the screen
mcq.plot.cluster(XICAB_cluster)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.cluster(XICAB_cluster, file="XICAB_cluster.pdf")
# Exporting the results in .txt or .tsv which is equivalent to csv except that the text separator is a tab. The files are saved in '/path/to/my_working_directory'
mcq.write.compar(XICAB_cluster, file="XICAB_cluster_results.tsv")
#****************************************************************************************************************
# 3.3.8. Analyzing protein abundance variations
#****************************************************************************************************************
############# Removing proteins showing little abundance variations. The selection criteria is the ratio between the minimal and the maximal mean abundance values computed for a factor or a combination of factors of interest.
XICAB <- mcq.drop.low.fold.changes(XICAB, cutoff=1.5, flist=c("jour", "zone"))
############# Differential analysis
# ANOVA for the factors of interest
XICAB_ANOVA <- mcq.compute.anova(XICAB, flist=c("jour", "zone"))
# Display the distribution of the p-values for a factor of interest on the screen
mcq.plot.pvalues(XICAB_ANOVA)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pvalues(XICAB_ANOVA, file="XICAB_pvalues_distribution.pdf")
############# Analyzing the proteins showing significant variations
# Retrieving the proteins showing significant abundance variations for a given factor
XICAB_PROTEINS_selected <- mcq.select.pvalues(XICAB_ANOVA, padjust=TRUE, 0.01, flist="jour")
# Retrieving of the abundance data for the selected proteins
XICAB_selected <- mcq.select.from.protlist(XICAB, XICAB_PROTEINS_selected)
# Tukey test
# WARNING : Verify your packageVersion of agricolae (with the R function packageVersion("agricolae")), it must be the 1.2.9 version
#(the URL provide in the PAPPSO site it's an unstable version, waiting for the stable one)
XICAB_selected_tukey <- mcq.compute.tukey(XICAB_ANOVA, flist="jour", protlist=XICAB_PROTEINS_selected)
# Display of the graph on the screen
mcq.plot.tukey(XICAB_selected_tukey, qprot=XICAB)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.tukey(XICAB_selected_tukey, qprot=XICAB, file="XICAB_Tukey.pdf")
# Exporting the values of Tukey test in a .tsv file
mcq.write.dataframe(XICAB_selected_tukey, file="XICAB_Tukey.tsv")
############# Plot of individual protein abundances
# Display of the graph on the screen
mcq.plot.protein.abundance(XICAB_selected, flist=c("jour", "zone"))
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.protein.abundance(XICAB_selected, flist=c("jour", "zone"), file="XICAB_plot_protein_abundances.pdf")
############# Analyzing protein abundance ratios
# Computing ratios between two levels of a factor of interest
XICAB_RATIO <- mcq.compute.fold.change(XICAB_selected, flist="jour", numerator="ss5", denominator="ss0")
# Display of the graph on the screen
mcq.plot.fold.change(XICAB_RATIO)
# Export of the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.fold.change(XICAB_RATIO, file="XICAB_RATIO_plot.pdf")
# Exporting the values of ratio in a .tsv file
mcq.write.dataframe(XICAB_RATIO, file="XICAB_RATIO_values.tsv")
#****************************************************************************************************************
# 3.3.9. Exporting results in '/path/to/my_working_directory'
#****************************************************************************************************************
# Exporting intensity data a compare 'format' (one sample = one column)
mcq.write.compar(XIC, file="XIC.tsv")
# Exporting protein abundances in a 'compare' format (one sample = one column)
mcq.write.compar(XICAB, file="XICAB.tsv")
# Exporting ANOVA results (only p-values) in a long format
mcq.write.dataframe(XICAB_ANOVA, file="XICAB_ANOVA.tsv")
# Exporting proteins abundances together with ANOVA results
mcq.write.merge(XICAB, XICAB_ANOVA, file="XICAB_ANOVA_merged.tsv",columnID = "msrunfile")
# Exporting protein abundances together with the ANOVA results for the proteins showing significant variations
XICAB_ANOVA_selected <- mcq.select.from.protlist(XICAB_ANOVA, XICAB_PROTEINS_selected)
mcq.write.merge(XICAB_selected, XICAB_ANOVA_selected, file="XICAB_ANOVA_selected_merged.tsv", columnID = "msrunfile")
#********************************************************
#--------------------------------------------------------
# 3.4. Analyzing peak counting data (semi-quantitative variations)
# #--------------------------------------------------------
#********************************************************
#****************************************************************************************************************
# 3.4.1. Filtering data to remove peptides-mz with dubious chromatographic data
#****************************************************************************************************************
# Removing peptides-mz showing too much variations of their retention time because they may be derived from misidentification. Use the plot produced by the mcq.plot.rt.variability function to decide on the cut-off to use (e.g. here, 20 seconds)
XICPC <- mcq.drop.variable.rt(XICRAW, cutoff=20)
# Removing very large chromatographic peaks. Use the plot produced by the mcq.plot.peak.width function to decide on the cut-off to use (e.g. here, 100 seconds)
XICPC <- mcq.drop.wide.peaks(XICPC, cutoff=200)
# Display of a summary of the 'XICPC' object after removal of dubious peptides
summary(XICPC)
#****************************************************************************************************************
# 3.4.2. Counting peaks and removing proteins showing low numbers of peaks
#****************************************************************************************************************
# Counting the number of 'peaks' which is used as a proxy for protein abundance
PC <- mcq.compute.peak.counting(XICPC)
# Removing proteins showing low numbers of peaks (< 3) in all the injections (the protein abundances are not reliable when the number of peaks is less than 3)
# For an example, if cutoff = 3, all the proteins quantified with only 0, 1 or 2 peaks in each of the injections will be removed.
PC <- mcq.drop.low.counts(PC, cutoff=3)
#****************************************************************************************************************
# 3.4.3. Analyzing protein abundance variations
#****************************************************************************************************************
############# Differential analysis
# Removing proteins showing few abundance variations for a factor of interest (ratio < 1.5)
PC <- mcq.drop.low.fold.changes(PC, cutoff=1.5, flist=c("jour", "zone"))
# ANOVA for the factors of interest. ANOVA is based on a generalized linear model using Poisson distribution. If there are several factors of interest, interaction term(s) automatically included in the model.
# Run anova without interaction term, if you want to perform tukey test with counting data
PC_ANOVA <- mcq.compute.anova(PC,flist=c("jour", "zone"), inter=FALSE)
# Display the distribution of the p-values for the factor(s) declared in ANOVA on the screen
mcq.plot.pvalues(PC_ANOVA, width=0.005)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.pvalues(PC_ANOVA, file="PC_pvalues_distribution.pdf")
############# Analysis of the proteins showing significant variations
# Retrieving ANOVA results for the proteins showing significant abundance variations for a given factor
PC_PROTEINS_selected <- mcq.select.pvalues(PC_ANOVA, padjust=TRUE, alpha=0.05, flist="jour")
# Retriving the abundance data for the selected proteins
PC_selected <- mcq.select.from.protlist(PC, protlist=PC_PROTEINS_selected)
# Tukey test
# WARNING : Verify your packageVersion of agricolae (with the R function packageVersion("agricolae")), it must be the 1.2.9 version
#(the URL provide in the PAPPSO site it's an unstable version, waiting for the stable one)
PC_selected_tukey <- mcq.compute.tukey(PC_ANOVA, flist="jour", protlist=PC_PROTEINS_selected)
# Display of the graph on the screen
mcq.plot.tukey(PC_selected_tukey, qprot=PC)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.tukey(PC_selected_tukey, qprot=PC, file="PC_Tukey.pdf")
# Exporting the values of Tukey testin a .tsv file
mcq.write.dataframe(PC_selected_tukey, file="PC_Tukey.tsv")
############# Plot of individual protein abundances
# Display of the graph on the screen for all the proteins
mcq.plot.protein.abundance(PC_selected, flist=c("jour", "zone"))
# Export of the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.protein.abundance(PC_selected, flist=c("jour", "zone"), file="PC_plot_protein_abundances.pdf")
############# Ratio
# Computing ratios
PC_RATIO <- mcq.compute.fold.change(PC_selected, flist="jour", numerator="ss5", denominator="ss0")
# Display of the graph on the screen
mcq.plot.fold.change(PC_RATIO)
# Exporting the graph in a .pdf file in '/path/to/my_working_directory'
mcq.plot.fold.change(PC_RATIO, file="PC_RATIO_plot.pdf")
# Exporting the values of ratio in a .tsv file
mcq.write.dataframe(PC_RATIO, file="PC_RATIO_values.tsv")
#****************************************************************************************************************
# 3.4.4. Exporting the results in '/path/to/my_working_directory'
#****************************************************************************************************************
# Exporting proteins abundances in a 'compare' format (one sample = one column)
mcq.write.compar(PC, file="PC.tsv")
# Exporting ANOVA results (only p-values) in a 'compare' format (one sample = one column)
mcq.write.dataframe(PC_ANOVA, file="PC_ANOVA.tsv")
# Exporting peak counts together with ANOVA results
mcq.write.merge(PC, PC_ANOVA, file="PC_anova_merged.tsv", columnID = "msrunfile")
# Exporting peak counts together with ANOVA results for the proteins showing significant variations
PC_ANOVA_selected <- mcq.select.from.protlist(PC_ANOVA, protlist=PC_PROTEINS_selected)
mcq.write.merge(PC_selected, PC_ANOVA_selected, file="PC_ANOVA_selected_merged.tsv", columnID = "msrunfile")
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
#****************************************************** 4. Exporting a synthesis
#**************************************************************************************************************************************************
#**************************************************************************************************************************************************
# Exporting synthesis table containing the results of spectral counting, XIC and peak counting analyses
mcq.write.synthesis(SC, SC_ANOVA, PC, PC_ANOVA, XICAB, XICAB_ANOVA, file="Synthesis_tab.tsv")