@article {760, title = {Novel genes and sex differences in COVID-19 severity.}, journal = {Hum Mol Genet}, year = {2022}, month = {2022 Jun 16}, abstract = {

Here we describe the results of a genome-wide study conducted in 11 939 COVID-19 positive cases with an extensive clinical information that were recruited from 34 hospitals across Spain (SCOURGE consortium). In sex-disaggregated genome-wide association studies for COVID-19 hospitalization, genome-wide significance (p < 5x10-8) was crossed for variants in 3p21.31 and 21q22.11 loci only among males (p =~1.3x10-22 and p =~8.1x10-12, respectively), and for variants in 9q21.32 near TLE1 only among females (p =~4.4x10-8). In a second phase, results were combined with an independent Spanish cohort (1598 COVID-19 cases and 1068 population controls), revealing in the overall analysis two novel risk loci in 9p13.3 and 19q13.12, with fine-mapping prioritized variants functionally associated with AQP3 (p =~2.7x10-8) and ARHGAP33 (p =~1.3x10-8), respectively. The meta-analysis of both phases with four European studies stratified by sex from the Host Genetics Initiative confirmed the association of the 3p21.31 and 21q22.11 loci predominantly in males and replicated a recently reported variant in 11p13 (ELF5, p = 4.1x10-8). Six of the COVID-19 HGI discovered loci were replicated and an HGI-based genetic risk score predicted the severity strata in SCOURGE. We also found more SNP-heritability and larger heritability differences by age (<60 or >= 60~years) among males than among females. Parallel genome-wide screening of inbreeding depression in SCOURGE also showed an effect of homozygosity in COVID-19 hospitalization and severity and this effect was stronger among older males. In summary, new candidate genes for COVID-19 severity and evidence supporting genetic disparities among sexes are provided.

}, issn = {1460-2083}, doi = {10.1093/hmg/ddac132}, author = {Cruz, Raquel and Almeida, Silvia Diz-de and Heredia, Miguel L{\'o}pez and Quintela, In{\'e}s and Ceballos, Francisco C and Pita, Guillermo and Lorenzo-Salazar, Jos{\'e} M and Gonz{\'a}lez-Montelongo, Rafaela and Gago-Dom{\'\i}nguez, Manuela and Porras, Marta Sevilla and Casta{\~n}o, Jair Antonio Tenorio and Nevado, Juli{\'a}n and Aguado, Jose Mar{\'\i}a and Aguilar, Carlos and Aguilera-Albesa, Sergio and Almadana, Virginia and Almoguera, Berta and Alvarez, Nuria and Andreu-Bernabeu, {\'A}lvaro and Arana-Arri, Eunate and Arango, Celso and Arranz, Mar{\'\i}a J and Artiga, Maria-Jesus and Baptista-Rosas, Ra{\'u}l C and Barreda-S{\'a}nchez, Mar{\'\i}a and Belhassen-Garcia, Moncef and Bezerra, Joao F and Bezerra, Marcos A C and Boix-Palop, Luc{\'\i}a and Bri{\'o}n, Maria and Brugada, Ram{\'o}n and Bustos, Matilde and Calder{\'o}n, Enrique J and Carbonell, Cristina and Castano, Luis and Castelao, Jose E and Conde-Vicente, Rosa and Cordero-Lorenzana, M Lourdes and Cortes-Sanchez, Jose L and Corton, Marta and Darnaude, M Teresa and De Martino-Rodr{\'\i}guez, Alba and Campo-P{\'e}rez, Victor and Bustamante, Aranzazu Diaz and Dom{\'\i}nguez-Garrido, Elena and Luchessi, Andr{\'e} D and Eir{\'o}s, Roc{\'\i}o and Sanabria, Gladys Mercedes Estigarribia and Fari{\~n}as, Mar{\'\i}a Carmen and Fern{\'a}ndez-Robelo, Ux{\'\i}a and Fern{\'a}ndez-Rodr{\'\i}guez, Amanda and Fern{\'a}ndez-Villa, Tania and Gil-Fournier, Bel{\'e}n and G{\'o}mez-Arrue, Javier and {\'A}lvarez, Beatriz Gonz{\'a}lez and Quir{\'o}s, Fernan Gonzalez Bernaldo and Gonz{\'a}lez-Pe{\~n}as, Javier and Guti{\'e}rrez-Bautista, Juan F and Herrero, Mar{\'\i}a Jos{\'e} and Herrero-Gonzalez, Antonio and Jimenez-Sousa, Mar{\'\i}a A and Lattig, Mar{\'\i}a Claudia and Borja, Anabel Liger and Lopez-Rodriguez, Rosario and Mancebo, Esther and Mart{\'\i}n-L{\'o}pez, Caridad and Mart{\'\i}n, Vicente and Martinez-Nieto, Oscar and Martinez-Lopez, Iciar and Martinez-Resendez, Michel F and Martinez-Perez, {\'A}ngel and Mazzeu, Juliana A and Mac{\'\i}as, Eleuterio Merayo and Minguez, Pablo and Cuerda, Victor Moreno and Silbiger, Vivian N and Oliveira, Silviene F and Ortega-Paino, Eva and Parellada, Mara and Paz-Artal, Estela and Santos, Ney P C and P{\'e}rez-Matute, Patricia and Perez, Patricia and P{\'e}rez-Tom{\'a}s, M Elena and Perucho, Teresa and Pinsach-Abuin, Mel Lina and Pompa-Mera, Ericka N and Porras-Hurtado, Gloria L and Pujol, Aurora and Le{\'o}n, Soraya Ramiro and Resino, Salvador and Fernandes, Marianne R and Rodr{\'\i}guez-Ruiz, Emilio and Rodriguez-Artalejo, Fernando and Rodriguez-Garcia, Jos{\'e} A and Ruiz-Cabello, Francisco and Ruiz-Hornillos, Javier and Ryan, Pablo and Soria, Jos{\'e} Manuel and Souto, Juan Carlos and Tamayo, Eduardo and Tamayo-Velasco, Alvaro and Taracido-Fernandez, Juan Carlos and Teper, Alejandro and Torres-Tobar, Lilian and Urioste, Miguel and Valencia-Ramos, Juan and Y{\'a}{\~n}ez, Zuleima and Zarate, Ruth and Nakanishi, Tomoko and Pigazzini, Sara and Degenhardt, Frauke and Butler-Laporte, Guillaume and Maya-Miles, Douglas and Bujanda, Luis and Bouysran, Youssef and Palom, Adriana and Ellinghaus, David and Mart{\'\i}nez-Bueno, Manuel and Rolker, Selina and Amitrano, Sara and Roade, Luisa and Fava, Francesca and Spinner, Christoph D and Prati, Daniele and Bernardo, David and Garc{\'\i}a, Federico and Darcis, Gilles and Fern{\'a}ndez-Cadenas, Israel and Holter, Jan Cato and Banales, Jesus M and Frithiof, Robert and Duga, Stefano and Asselta, Rosanna and Pereira, Alexandre C and Romero-G{\'o}mez, Manuel and Nafr{\'\i}a-Jim{\'e}nez, Beatriz and Hov, Johannes R and Migeotte, Isabelle and Renieri, Alessandra and Planas, Anna M and Ludwig, Kerstin U and Buti, Maria and Rahmouni, Souad and Alarc{\'o}n-Riquelme, Marta E and Schulte, Eva C and Franke, Andre and Karlsen, Tom H and Valenti, Luca and Zeberg, Hugo and Richards, Brent and Ganna, Andrea and Boada, Merc{\`e} and Rojas, Itziar and Ruiz, Agust{\'\i}n and S{\'a}nchez, Pascual and Real, Luis Miguel and Guill{\'e}n-Navarro, Encarna and Ayuso, Carmen and Gonz{\'a}lez-Neira, Anna and Riancho, Jos{\'e} A and Rojas-Martinez, Augusto and Flores, Carlos and Lapunzina, Pablo and Carracedo, {\'A}ngel} } @article {701, title = {CSVS, a crowdsourcing database of the Spanish population genetic variability.}, journal = {Nucleic Acids Res}, volume = {49}, year = {2021}, month = {2021 01 08}, pages = {D1130-D1137}, abstract = {

The knowledge of the genetic variability of the local population is of utmost importance in personalized medicine and has been revealed as a critical factor for the discovery of new disease variants. Here, we present the Collaborative Spanish Variability Server (CSVS), which currently contains more than 2000 genomes and exomes of unrelated Spanish individuals. This database has been generated in a collaborative crowdsourcing effort collecting sequencing data produced by local genomic projects and for other purposes. Sequences have been grouped by ICD10 upper categories. A web interface allows querying the database removing one or more ICD10 categories. In this way, aggregated counts of allele frequencies of the pseudo-control Spanish population can be obtained for diseases belonging to the category removed. Interestingly, in addition to pseudo-control studies, some population studies can be made, as, for example, prevalence of pharmacogenomic variants, etc. In addition, this genomic data has been used to define the first Spanish Genome Reference Panel (SGRP1.0) for imputation. This is the first local repository of variability entirely produced by a crowdsourcing effort and constitutes an example for future initiatives to characterize local variability worldwide. CSVS is also part of the GA4GH Beacon network. CSVS can be accessed at: http://csvs.babelomics.org/.

}, keywords = {Alleles, Chromosome Mapping, Crowdsourcing, Databases, Genetic, Exome, Gene Frequency, Genetic Variation, Genetics, Population, Genome, Human, Genomics, Humans, Internet, Precision Medicine, Software, Spain}, issn = {1362-4962}, doi = {10.1093/nar/gkaa794}, author = {Pe{\~n}a-Chilet, Maria and Rold{\'a}n, Gema and Perez-Florido, Javier and Ortuno, Francisco M and Carmona, Rosario and Aquino, Virginia and L{\'o}pez-L{\'o}pez, Daniel and Loucera, Carlos and Fernandez-Rueda, Jose L and Gallego, Asunci{\'o}n and Garcia-Garcia, Francisco and Gonz{\'a}lez-Neira, Anna and Pita, Guillermo and N{\'u}{\~n}ez-Torres, Roc{\'\i}o and Santoyo-L{\'o}pez, Javier and Ayuso, Carmen and Minguez, Pablo and Avila-Fernandez, Almudena and Corton, Marta and Moreno-Pelayo, Miguel {\'A}ngel and Morin, Mat{\'\i}as and Gallego-Martinez, Alvaro and Lopez-Escamez, Jose A and Borrego, Salud and Anti{\v n}olo, Guillermo and Amigo, Jorge and Salgado-Garrido, Josefa and Pasalodos-Sanchez, Sara and Morte, Beatriz and Carracedo, {\'A}ngel and Alonso, {\'A}ngel and Dopazo, Joaquin} } @article {468, title = {PTMcode v2: a resource for functional associations of post-translational modifications within and between proteins.}, journal = {Nucleic Acids Res}, volume = {43}, year = {2015}, month = {2015 Jan}, pages = {D494-502}, abstract = {

The post-translational regulation of proteins is mainly driven by two molecular events, their modification by several types of moieties and their interaction with other proteins. These two processes are interdependent and together are responsible for the function of the protein in a particular cell state. Several databases focus on the prediction and compilation of protein-protein interactions (PPIs) and no less on the collection and analysis of protein post-translational modifications (PTMs), however, there are no resources that concentrate on describing the regulatory role of PTMs in PPIs. We developed several methods based on residue co-evolution and proximity to predict the functional associations of pairs of PTMs that we apply to modifications in the same protein and between two interacting proteins. In order to make data available for understudied organisms, PTMcode v2 (http://ptmcode.embl.de) includes a new strategy to propagate PTMs from validated modified sites through orthologous proteins. The second release of PTMcode covers 19 eukaryotic species from which we collected more than 300,000 experimentally verified PTMs (>1,300,000 propagated) of 69 types extracting the post-translational regulation of >100,000 proteins and >100,000 interactions. In total, we report 8 million associations of PTMs regulating single proteins and over 9.4 million interplays tuning PPIs.

}, keywords = {Databases, Protein, Internet, Protein Interaction Mapping, Protein Processing, Post-Translational}, issn = {1362-4962}, doi = {10.1093/nar/gku1081}, author = {Minguez, Pablo and Letunic, Ivica and Parca, Luca and Garc{\'\i}a-Alonso, Luz and Dopazo, Joaquin and Huerta-Cepas, Jaime and Bork, Peer} } @article {565, title = {Understanding disease mechanisms with models of signaling pathway activities}, journal = {BMC systems biology}, volume = {8}, year = {2014}, month = {10}, pages = {121}, doi = {10.1186/s12918-014-0121-3}, author = {Sebasti{\'a}n-Leon, Patricia and Vidal, Enrique and Minguez, Pablo and Conesa, Ana and Tarazona, Sonia and Amadoz, Alicia and Armero, Carmen and Salavert Torres, Francisco and Vidal-Puig, Antonio and Montaner, David and Dopazo, Joaquin} } @article {1093, title = {Understanding disease mechanisms with models of signaling pathway activities.}, journal = {BMC systems biology}, volume = {8}, year = {2014}, month = {2014 Oct 25}, pages = {121}, abstract = {BackgroundUnderstanding the aspects of the cell functionality that account for disease or drug action mechanisms is one of the main challenges in the analysis of genomic data and is on the basis of the future implementation of precision medicine.ResultsHere we propose a simple probabilistic model in which signaling pathways are separated into elementary sub-pathways or signal transmission circuits (which ultimately trigger cell functions) and then transforms gene expression measurements into probabilities of activation of such signal transmission circuits. Using this model, differential activation of such circuits between biological conditions can be estimated. Thus, circuit activation statuses can be interpreted as biomarkers that discriminate among the compared conditions. This type of mechanism-based biomarkers accounts for cell functional activities and can easily be associated to disease or drug action mechanisms. The accuracy of the proposed model is demonstrated with simulations and real datasets.ConclusionsThe proposed model provides detailed information that enables the interpretation disease mechanisms as a consequence of the complex combinations of altered gene expression values. Moreover, it offers a framework for suggesting possible ways of therapeutic intervention in a pathologically perturbed system.}, keywords = {Disease mechanism, pathway, signalling, Systems biology}, issn = {1752-0509}, doi = {10.1186/s12918-014-0121-3}, url = {http://www.biomedcentral.com/1752-0509/8/121/abstract}, author = {Sebasti{\'a}n-Leon, Patricia and Vidal, Enrique and Minguez, Pablo and Ana Conesa and Sonia Tarazona and Amadoz, Alicia and Armero, Carmen and Salavert, Francisco and Vidal-Puig, Antonio and Montaner, David and Joaqu{\'\i}n Dopazo} } @article {512, title = {Discovering the hidden sub-network component in a ranked list of genes or proteins derived from genomic experiments.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Nov 01}, pages = {e158}, abstract = {

Genomic experiments (e.g. differential gene expression, single-nucleotide polymorphism association) typically produce ranked list of genes. We present a simple but powerful approach which uses protein-protein interaction data to detect sub-networks within such ranked lists of genes or proteins. We performed an exhaustive study of network parameters that allowed us concluding that the average number of components and the average number of nodes per component are the parameters that best discriminate between real and random networks. A novel aspect that increases the efficiency of this strategy in finding sub-networks is that, in addition to direct connections, also connections mediated by intermediate nodes are considered to build up the sub-networks. The possibility of using of such intermediate nodes makes this approach more robust to noise. It also overcomes some limitations intrinsic to experimental designs based on differential expression, in which some nodes are invariant across conditions. The proposed approach can also be used for candidate disease-gene prioritization. Here, we demonstrate the usefulness of the approach by means of several case examples that include a differential expression analysis in Fanconi Anemia, a genome-wide association study of bipolar disorder and a genome-scale study of essentiality in cancer genes. An efficient and easy-to-use web interface (available at http://www.babelomics.org) based on HTML5 technologies is also provided to run the algorithm and represent the network.

}, keywords = {Bipolar Disorder, Fanconi Anemia, Gene Regulatory Networks, Genes, Neoplasm, Genome-Wide Association Study, Genomics, Humans, Protein Interaction Mapping}, issn = {1362-4962}, doi = {10.1093/nar/gks699}, author = {Garc{\'\i}a-Alonso, Luz and Alonso, Roberto and Vidal, Enrique and Amadoz, Alicia and De Maria, Alejandro and Minguez, Pablo and Medina, Ignacio and Dopazo, Joaquin} } @article {21408226, title = {Assessing the biological significance of gene expression signatures and co-expression modules by studying their network properties.}, journal = {PloS one}, volume = {6}, year = {2011}, month = {2011}, pages = {e17474}, abstract = {

Microarray experiments have been extensively used to define signatures, which are sets of genes that can be considered markers of experimental conditions (typically diseases). Paradoxically, in spite of the apparent functional role that might be attributed to such gene sets, signatures do not seem to be reproducible across experiments. Given the close relationship between function and protein interaction, network properties can be used to study to what extent signatures are composed of genes whose resulting proteins show a considerable level of interaction (and consequently a putative common functional role).We have analysed 618 signatures and 507 modules of co-expression in cancer looking for significant values of four main protein-protein interaction (PPI) network parameters: connection degree, cluster coefficient, betweenness and number of components. A total of 3904 gene ontology (GO) modules, 146 KEGG pathways, and 263 Biocarta pathways have been used as functional modules of reference.Co-expression modules found in microarray experiments display a high level of connectivity, similar to the one shown by conventional modules based on functional definitions (GO, KEGG and Biocarta). A general observation for all the classes studied is that the networks formed by the modules improve their topological parameters when an external protein is allowed to be introduced within the paths (up to the 70\% of GO modules show network parameters beyond the random expectation). This fact suggests that functional definitions are incomplete and some genes might still be missing. Conversely, signatures are clearly not capturing the altered functions in the corresponding studies. This is probably because the way in which the genes have been selected in the signatures is too conservative. These results suggest that gene selection methods which take into account relationships among genes should be superior to methods that assume independence among genes outside their functional contexts.

}, doi = {doi:10.1371/journal.pone.0017474}, url = {http://www.plosone.org/article/info\%3Adoi\%2F10.1371\%2Fjournal.pone.0017474}, author = {Minguez, Pablo and Dopazo, Joaquin} } @article {552, title = {Functional genomics and networks: new approaches in the extraction of complex gene modules.}, journal = {Expert Rev Proteomics}, volume = {7}, year = {2010}, month = {2010 Feb}, pages = {55-63}, abstract = {

The engine that makes the cell work is made of an intricate network of molecular interactions. Nowadays, the elements and relationships of this complex network can be studied with several types of high-throughput techniques. The dream of having a global picture of the cell from different perspectives that can jointly explain cell behavior is, at least technically, feasible. However, this task can only be accomplished by filling the gap between data and information. The availability of methods capable of accurately managing, integrating and analyzing the results from these experiments is crucial for this purpose. Here, we review the new challenges raised by the availability of different genomic data, as well as the new proposals presented to cope with the increasing data complexity. Special emphasis is given to approaches that explore the transcriptome trying to describe the modules of genes that account for the traits studied.

}, keywords = {Gene Expression Regulation, Gene Regulatory Networks, Genomics, Protein Binding, Proteins, Systems biology}, issn = {1744-8387}, doi = {10.1586/epr.09.103}, author = {Minguez, Pablo and Dopazo, Joaquin} } @article {808, title = {Selection upon Genome Architecture: Conservation of Functional Neighborhoods with Changing Genes}, journal = {PLoS Comput. Biol.}, volume = {6}, number = {10}, year = {2010}, pages = {e1000953}, doi = {doi:10.1371/journal.pcbi.1000953}, url = {http://www.ploscompbiol.org/article/info:doi/10.1371/journal.pcbi.1000953}, author = {Al-Shahrour, F{\'a}tima and Minguez, Pablo and Marqu{\'e}s-Bonet, Tom{\'a}s and Gazave, Elodie and Navarro, Arcadi and Dopazo, Joaquin} } @article {582, title = {Gene set internal coherence in the context of functional profiling.}, journal = {BMC Genomics}, volume = {10}, year = {2009}, month = {2009 Apr 27}, pages = {197}, abstract = {

BACKGROUND: Functional profiling methods have been extensively used in the context of high-throughput experiments and, in particular, in microarray data analysis. Such methods use available biological information to define different types of functional gene modules (e.g. gene ontology -GO-, KEGG pathways, etc.) whose representation in a pre-defined list of genes is further studied. In the most popular type of microarray experimental designs (e.g. up- or down-regulated genes, clusters of co-expressing genes, etc.) or in other genomic experiments (e.g. Chip-on-chip, epigenomics, etc.) these lists are composed by genes with a high degree of co-expression. Therefore, an implicit assumption in the application of functional profiling methods within this context is that the genes corresponding to the modules tested are effectively defining sets of co-expressing genes. Nevertheless not all the functional modules are biologically coherent entities in terms of co-expression, which will eventually hinder its detection with conventional methods of functional enrichment.

RESULTS: Using a large collection of microarray data we have carried out a detailed survey of internal correlation in GO terms and KEGG pathways, providing a coherence index to be used for measuring functional module co-regulation. An unexpected low level of internal correlation was found among the modules studied. Only around 30\% of the modules defined by GO terms and 57\% of the modules defined by KEGG pathways display an internal correlation higher than the expected by chance.This information on the internal correlation of the genes within the functional modules can be used in the context of a logistic regression model in a simple way to improve their detection in gene expression experiments.

CONCLUSION: For the first time, an exhaustive study on the internal co-expression of the most popular functional categories has been carried out. Interestingly, the real level of coexpression within many of them is lower than expected (or even inexistent), which will preclude its detection by means of most conventional functional profiling methods. If the gene-to-function correlation information is used in functional profiling methods, the results obtained improve the ones obtained by conventional enrichment methods.

}, keywords = {Algorithms, Breast Neoplasms, Carcinoma, Intraductal, Noninfiltrating, Computational Biology, Databases, Nucleic Acid, Female, Gene Expression Profiling, Genomics, Humans, Oligonucleotide Array Sequence Analysis, Papillomavirus Infections, Reproducibility of Results}, issn = {1471-2164}, doi = {10.1186/1471-2164-10-197}, author = {Montaner, David and Minguez, Pablo and Al-Shahrour, F{\'a}tima and Dopazo, Joaquin} } @article {586, title = {SNOW, a web-based tool for the statistical analysis of protein-protein interaction networks.}, journal = {Nucleic Acids Res}, volume = {37}, year = {2009}, month = {2009 Jul}, pages = {W109-14}, abstract = {

Understanding the structure and the dynamics of the complex intercellular network of interactions that contributes to the structure and function of a living cell is one of the main challenges of today{\textquoteright}s biology. SNOW inputs a collection of protein (or gene) identifiers and, by using the interactome as scaffold, draws the connections among them, calculates several relevant network parameters and, as a novelty among the rest of tools of its class, it estimates their statistical significance. The parameters calculated for each node are: connectivity, betweenness and clustering coefficient. It also calculates the number of components, number of bicomponents and articulation points. An interactive network viewer is also available to explore the resulting network. SNOW is available at http://snow.bioinfo.cipf.es.

}, keywords = {Computer Graphics, Data Interpretation, Statistical, Databases, Protein, Humans, Internet, Protein Interaction Mapping, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkp402}, author = {Minguez, Pablo and G{\"o}tz, Stefan and Montaner, David and Al-Shahrour, F{\'a}tima and Dopazo, Joaquin} } @article {PabloMinguez05192009, title = {SNOW, a web-based tool for the statistical analysis of protein-protein interaction networks}, journal = {Nucl. Acids Res.}, volume = {37}, year = {2009}, pages = {W109-114}, abstract = {

Understanding the structure and the dynamics of the complex intercellular network of interactions that contributes to the structure and function of a living cell is one of the main challenges of today{\textquoteright}s biology. SNOW inputs a collection of protein (or gene) identifiers and, by using the interactome as scaffold, draws the connections among them, calculates several relevant network parameters and, as a novelty among the rest of tools of its class, it estimates their statistical significance. The parameters calculated for each node are: connectivity, betweenness and clustering coefficient. It also calculates the number of components, number of bicomponents and articulation points. An interactive network viewer is also available to explore the resulting network. SNOW is available at http://snow.bioinfo.cipf.es.

}, keywords = {interactome, network, snow}, doi = {10.1093/nar/gkp402}, url = {http://nar.oxfordjournals.org/content/early/2009/05/19/nar.gkp402.full}, author = {Minguez, Pablo and Gotz, S. and Montaner, David and Fatima Al-Shahrour and Dopazo, Joaquin} } @article {593, title = {GEPAS, a web-based tool for microarray data analysis and interpretation.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jul 01}, pages = {W308-14}, abstract = {

Gene Expression Profile Analysis Suite (GEPAS) is one of the most complete and extensively used web-based packages for microarray data analysis. During its more than 5 years of activity it has continuously been updated to keep pace with the state-of-the-art in the changing microarray data analysis arena. GEPAS offers diverse analysis options that include well established as well as novel algorithms for normalization, gene selection, class prediction, clustering and functional profiling of the experiment. New options for time-course (or dose-response) experiments, microarray-based class prediction, new clustering methods and new tests for differential expression have been included. The new pipeliner module allows automating the execution of sequential analysis steps by means of a simple but powerful graphic interface. An extensive re-engineering of GEPAS has been carried out which includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. GEPAS is nowadays the most quoted web tool in its field and it is extensively used by researchers of many countries and its records indicate an average usage rate of 500 experiments per day. GEPAS, is available at http://www.gepas.org.

}, keywords = {Computer Graphics, Dose-Response Relationship, Drug, Gene Expression Profiling, Internet, Kinetics, Oligonucleotide Array Sequence Analysis, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkn303}, author = {T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Carbonell, Jos{\'e} and Huerta-Cepas, Jaime and Minguez, Pablo and Alloza, Eva and Al-Shahrour, F{\'a}tima and Vegas-Azc{\'a}rate, Susana and Goetz, Stefan and Escobar, Pablo and Garcia-Garcia, Francisco and Conesa, Ana and Montaner, David and Dopazo, Joaquin} } @article {605, title = {FatiGO +: a functional profiling tool for genomic data. Integration of functional annotation, regulatory motifs and interaction data with microarray experiments.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jul}, pages = {W91-6}, abstract = {

The ultimate goal of any genome-scale experiment is to provide a functional interpretation of the data, relating the available information with the hypotheses that originated the experiment. Thus, functional profiling methods have become essential in diverse scenarios such as microarray experiments, proteomics, etc. We present the FatiGO+, a web-based tool for the functional profiling of genome-scale experiments, specially oriented to the interpretation of microarray experiments. In addition to different functional annotations (gene ontology, KEGG pathways, Interpro motifs, Swissprot keywords and text-mining based bioentities related to diseases and chemical compounds) FatiGO+ includes, as a novelty, regulatory and structural information. The regulatory information used includes predictions of targets for distinct regulatory elements (obtained from the Transfac and CisRed databases). Additionally FatiGO+ uses predictions of target motifs of miRNA to infer which of these can be activated or deactivated in the sample of genes studied. Finally, properties of gene products related to their relative location and connections in the interactome have also been used. Also, enrichment of any of these functional terms can be directly analysed on chromosomal coordinates. FatiGO+ can be found at: http://www.fatigoplus.org and within the Babelomics environment http://www.babelomics.org.

}, keywords = {Amino Acid Motifs, Animals, Binding Sites, Computational Biology, Gene Expression Profiling, Genes, Genomics, Humans, Internet, Oligonucleotide Array Sequence Analysis, Programming Languages, Software, Systems Integration, Transcription Factors}, issn = {1362-4962}, doi = {10.1093/nar/gkm260}, author = {Al-Shahrour, F{\'a}tima and Minguez, Pablo and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Alloza, Eva and Montaner, David and Dopazo, Joaquin} } @article {606, title = {From genes to functional classes in the study of biological systems.}, journal = {BMC Bioinformatics}, volume = {8}, year = {2007}, month = {2007 Apr 03}, pages = {114}, abstract = {

BACKGROUND: With the popularization of high-throughput techniques, the need for procedures that help in the biological interpretation of results has increased enormously. Recently, new procedures inspired in systems biology criteria have started to be developed.

RESULTS: Here we present FatiScan, a web-based program which implements a threshold-independent test for the functional interpretation of large-scale experiments that does not depend on the pre-selection of genes based on the multiple application of independent tests to each gene. The test implemented aims to directly test the behaviour of blocks of functionally related genes, instead of focusing on single genes. In addition, the test does not depend on the type of the data used for obtaining significance values, and consequently different types of biologically informative terms (gene ontology, pathways, functional motifs, transcription factor binding sites or regulatory sites from CisRed) can be applied to different classes of genome-scale studies. We exemplify its application in microarray gene expression, evolution and interactomics.

CONCLUSION: Methods for gene set enrichment which, in addition, are independent from the original data and experimental design constitute a promising alternative for the functional profiling of genome-scale experiments. A web server that performs the test described and other similar ones can be found at: http://www.babelomics.org.

}, keywords = {Algorithms, Chromosome Mapping, Computer Simulation, Gene Expression Profiling, Models, Biological, Multigene Family, Signal Transduction, Software, Systems biology, User-Computer Interface}, issn = {1471-2105}, doi = {10.1186/1471-2105-8-114}, author = {Al-Shahrour, F{\'a}tima and Arbiza, Leonardo and Dopazo, Hern{\'a}n and Huerta-Cepas, Jaime and Minguez, Pablo and Montaner, David and Dopazo, Joaquin} } @article {592, title = {Functional profiling of microarray experiments using text-mining derived bioentities.}, journal = {Bioinformatics}, volume = {23}, year = {2007}, month = {2007 Nov 15}, pages = {3098-9}, abstract = {

MOTIVATION: The increasing use of microarray technologies brought about a parallel demand in methods for the functional interpretation of the results. Beyond the conventional functional annotations for genes, such as gene ontology, pathways, etc. other sources of information are still to be exploited. Text-mining methods allow extracting informative terms (bioentities) with different functional, chemical, clinical, etc. meanings, that can be associated to genes. We show how to use these associations within an appropriate statistical framework and how to apply them through easy-to-use, web-based environments to the functional interpretation of microarray experiments. Functional enrichment and gene set enrichment tests using bioentities are presented.

}, keywords = {Artificial Intelligence, Databases, Protein, Gene Expression Profiling, Information Storage and Retrieval, Natural Language Processing, Proteins, Research Design, Systems Integration}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btm445}, author = {Minguez, Pablo and Al-Shahrour, F{\'a}tima and Montaner, David and Dopazo, Joaquin} }